9 jaren geleden · dd08355e61
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,7 @@
 
				+# Ignore all swp files
			
 
				+*.swo
			
 
				+*.swp
			
 
				+*~
			
 
				+
			
 
				+# Ignore weibo data file
			
 
				+testdata/weibo_data.txt
			
--- a/core/indexer.go
+++ b/core/indexer.go
@@ -5,6 +5,7 @@ import (
 
				 	"github.com/huichen/wukong/utils"
			
 
				 	"log"
			
 
				 	"math"
			
 
				+	"sort"
			
 
				 	"sync"
			
 
				 )
			
 
				 
			
@@ -14,8 +15,18 @@ type Indexer struct {
 
				 	// 加了读写锁以保证读写安全
			
 
				 	tableLock struct {
			
 
				 		sync.RWMutex
			
 
				-		table map[string]*KeywordIndices
			
 
				-		docs  map[uint64]bool
			
 
				+		table     map[string]*KeywordIndices
			
 
				+		docsState map[uint64]int // 0: 存在于索引中，1: 等待删除，2: 等待加入
			
 
				+	}
			
 
				+	addCacheLock struct {
			
 
				+		sync.RWMutex
			
 
				+		addCachePointer int
			
 
				+		addCache        types.DocumentsIndex
			
 
				+	}
			
 
				+	removeCacheLock struct {
			
 
				+		sync.RWMutex
			
 
				+		removeCachePointer int
			
 
				+		removeCache        types.DocumentsId
			
 
				 	}
			
 
				 
			
 
				 	initOptions types.IndexerInitOptions
			
@@ -33,6 +44,9 @@ type Indexer struct {
 
				 
			
 
				 // 反向索引表的一行，收集了一个搜索键出现的所有文档，按照DocId从小到大排序。
			
 
				 type KeywordIndices struct {
			
 
				+	// 用于标记在 docIds[] 进行批量加入索引时二分查找的下界
			
 
				+	lowerBound int
			
 
				+
			
 
				 	// 下面的切片是否为空，取决于初始化时IndexType的值
			
 
				 	docIds      []uint64  // 全部类型都有
			
 
				 	frequencies []float32 // IndexType == FrequenciesIndex
			
@@ -44,87 +58,238 @@ func (indexer *Indexer) Init(options types.IndexerInitOptions) {
 
				 	if indexer.initialized == true {
			
 
				 		log.Fatal("索引器不能初始化两次")
			
 
				 	}
			
 
				+	options.Init()
			
 
				+	indexer.initOptions = options
			
 
				 	indexer.initialized = true
			
 
				 
			
 
				 	indexer.tableLock.table = make(map[string]*KeywordIndices)
			
 
				-	indexer.tableLock.docs = make(map[uint64]bool)
			
 
				-	indexer.initOptions = options
			
 
				+	indexer.tableLock.docsState = make(map[uint64]int)
			
 
				+	indexer.addCacheLock.addCache = make([]*types.DocumentIndex, indexer.initOptions.DocCacheSize)
			
 
				+	indexer.removeCacheLock.removeCache = make([]uint64, indexer.initOptions.DocCacheSize*2)
			
 
				 	indexer.docTokenLengths = make(map[uint64]float32)
			
 
				 }
			
 
				 
			
 
				-// 向反向索引表中加入一个文档
			
 
				-func (indexer *Indexer) AddDocument(document *types.DocumentIndex) {
			
 
				+// 从KeywordIndices中得到第i个文档的DocId
			
 
				+func (indexer *Indexer) getDocId(ti *KeywordIndices, i int) uint64 {
			
 
				+	return ti.docIds[i]
			
 
				+}
			
 
				+
			
 
				+// 得到KeywordIndices中文档总数
			
 
				+func (indexer *Indexer) getIndexLength(ti *KeywordIndices) int {
			
 
				+	return len(ti.docIds)
			
 
				+}
			
 
				+
			
 
				+// 向 ADDCACHE 中加入一个文档
			
 
				+func (indexer *Indexer) AddDocumentToCache(document *types.DocumentIndex, forceUpdate bool) {
			
 
				 	if indexer.initialized == false {
			
 
				 		log.Fatal("索引器尚未初始化")
			
 
				 	}
			
 
				 
			
 
				-	indexer.tableLock.Lock()
			
 
				-	defer indexer.tableLock.Unlock()
			
 
				+	indexer.addCacheLock.Lock()
			
 
				+	if document != nil {
			
 
				+		indexer.addCacheLock.addCache[indexer.addCacheLock.addCachePointer] = document
			
 
				+		indexer.addCacheLock.addCachePointer++
			
 
				+	}
			
 
				+	if indexer.addCacheLock.addCachePointer >= indexer.initOptions.DocCacheSize || forceUpdate {
			
 
				+		indexer.tableLock.Lock()
			
 
				+		position := 0
			
 
				+		for i := 0; i < indexer.addCacheLock.addCachePointer; i++ {
			
 
				+			docIndex := indexer.addCacheLock.addCache[i]
			
 
				+			if docState, ok := indexer.tableLock.docsState[docIndex.DocId]; ok && docState == 0 {
			
 
				+				if position != i {
			
 
				+					indexer.addCacheLock.addCache[position], indexer.addCacheLock.addCache[i] =
			
 
				+						indexer.addCacheLock.addCache[i], indexer.addCacheLock.addCache[position]
			
 
				+				}
			
 
				+				indexer.removeCacheLock.Lock()
			
 
				+				indexer.removeCacheLock.removeCache[indexer.removeCacheLock.removeCachePointer] =
			
 
				+					docIndex.DocId
			
 
				+				indexer.removeCacheLock.removeCachePointer++
			
 
				+				indexer.removeCacheLock.Unlock()
			
 
				+				indexer.tableLock.docsState[docIndex.DocId] = 1
			
 
				+				indexer.numDocuments--
			
 
				+				position++
			
 
				+			} else if !(ok && docState == 1) {
			
 
				+				// ok && docState == 1 表示等待删除或者删除当前 doc
			
 
				+				indexer.tableLock.docsState[docIndex.DocId] = 2
			
 
				+			}
			
 
				+		}
			
 
				 
			
 
				-	// 更新文档关键词总长度
			
 
				-	if document.TokenLength != 0 {
			
 
				-		originalLength, found := indexer.docTokenLengths[document.DocId]
			
 
				-		indexer.docTokenLengths[document.DocId] = float32(document.TokenLength)
			
 
				-		if found {
			
 
				-			indexer.totalTokenLength += document.TokenLength - originalLength
			
 
				-		} else {
			
 
				-			indexer.totalTokenLength += document.TokenLength
			
 
				+		indexer.tableLock.Unlock()
			
 
				+		if indexer.RemoveDocumentToCache(0, forceUpdate) {
			
 
				+			position = 0
			
 
				 		}
			
 
				+
			
 
				+		addCachedDocuments := indexer.addCacheLock.addCache[position:indexer.addCacheLock.addCachePointer]
			
 
				+		indexer.addCacheLock.addCachePointer = position
			
 
				+		indexer.addCacheLock.Unlock()
			
 
				+		sort.Sort(addCachedDocuments)
			
 
				+		indexer.AddDocuments(&addCachedDocuments)
			
 
				+	} else {
			
 
				+		indexer.addCacheLock.Unlock()
			
 
				 	}
			
 
				+}
			
 
				 
			
 
				-	docIdIsNew := true
			
 
				-	for _, keyword := range document.Keywords {
			
 
				-		indices, foundKeyword := indexer.tableLock.table[keyword.Text]
			
 
				-		if !foundKeyword {
			
 
				-			// 如果没找到该搜索键则加入
			
 
				-			ti := KeywordIndices{}
			
 
				-			switch indexer.initOptions.IndexType {
			
 
				-			case types.LocationsIndex:
			
 
				-				ti.locations = [][]int{keyword.Starts}
			
 
				-			case types.FrequenciesIndex:
			
 
				-				ti.frequencies = []float32{keyword.Frequency}
			
 
				-			}
			
 
				-			ti.docIds = []uint64{document.DocId}
			
 
				-			indexer.tableLock.table[keyword.Text] = &ti
			
 
				+// 向反向索引表中加入 ADDCACHE 中所有文档
			
 
				+func (indexer *Indexer) AddDocuments(documents *types.DocumentsIndex) {
			
 
				+	if indexer.initialized == false {
			
 
				+		log.Fatal("索引器尚未初始化")
			
 
				+	}
			
 
				+
			
 
				+	indexer.tableLock.Lock()
			
 
				+	defer indexer.tableLock.Unlock()
			
 
				+	for _, indices := range indexer.tableLock.table {
			
 
				+		indices.lowerBound = 0
			
 
				+	}
			
 
				+
			
 
				+	// DocId 递增顺序遍历插入文档保证索引移动次数最少
			
 
				+	for i, document := range *documents {
			
 
				+		if i < len(*documents)-1 && (*documents)[i].DocId == (*documents)[i+1].DocId {
			
 
				+			// 如果有重复文档加入，因为稳定排序，只加入最后一个
			
 
				+			continue
			
 
				+		}
			
 
				+		if docState, ok := indexer.tableLock.docsState[document.DocId]; ok && docState == 1 {
			
 
				+			// 如果此时 docState 仍为 1，说明该文档需被删除
			
 
				 			continue
			
 
				 		}
			
 
				 
			
 
				-		// 查找应该插入的位置
			
 
				-		position, found := indexer.searchIndex(
			
 
				-			indices, 0, indexer.getIndexLength(indices)-1, document.DocId)
			
 
				-		if found {
			
 
				-			docIdIsNew = false
			
 
				+		// 更新文档关键词总长度
			
 
				+		if document.TokenLength != 0 {
			
 
				+			indexer.docTokenLengths[document.DocId] = float32(document.TokenLength)
			
 
				+			indexer.totalTokenLength += document.TokenLength
			
 
				+		}
			
 
				+
			
 
				+		docIdIsNew := true
			
 
				+		for _, keyword := range document.Keywords {
			
 
				+			indices, foundKeyword := indexer.tableLock.table[keyword.Text]
			
 
				+			if !foundKeyword {
			
 
				+				// 如果没找到该搜索键则加入
			
 
				+				ti := KeywordIndices{}
			
 
				+				switch indexer.initOptions.IndexType {
			
 
				+				case types.LocationsIndex:
			
 
				+					ti.locations = [][]int{keyword.Starts}
			
 
				+				case types.FrequenciesIndex:
			
 
				+					ti.frequencies = []float32{keyword.Frequency}
			
 
				+				}
			
 
				+				ti.docIds = []uint64{document.DocId}
			
 
				+				indexer.tableLock.table[keyword.Text] = &ti
			
 
				+				continue
			
 
				+			}
			
 
				 
			
 
				-			// 覆盖已有的索引项
			
 
				+			// 查找应该插入的位置，且索引一定不存在
			
 
				+			position, _ := indexer.searchIndex(
			
 
				+				indices, indices.lowerBound, indexer.getIndexLength(indices)-1, document.DocId)
			
 
				+			indices.lowerBound = position
			
 
				 			switch indexer.initOptions.IndexType {
			
 
				 			case types.LocationsIndex:
			
 
				+				indices.locations = append(indices.locations, []int{})
			
 
				+				copy(indices.locations[position+1:], indices.locations[position:])
			
 
				 				indices.locations[position] = keyword.Starts
			
 
				 			case types.FrequenciesIndex:
			
 
				+				indices.frequencies = append(indices.frequencies, float32(0))
			
 
				+				copy(indices.frequencies[position+1:], indices.frequencies[position:])
			
 
				 				indices.frequencies[position] = keyword.Frequency
			
 
				 			}
			
 
				-			continue
			
 
				+			indices.docIds = append(indices.docIds, 0)
			
 
				+			copy(indices.docIds[position+1:], indices.docIds[position:])
			
 
				+			indices.docIds[position] = document.DocId
			
 
				+		}
			
 
				+
			
 
				+		// 更新文章状态和总数
			
 
				+		if docIdIsNew {
			
 
				+			indexer.tableLock.docsState[document.DocId] = 0
			
 
				+			indexer.numDocuments++
			
 
				 		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+// 向 REMOVECACHE 中加入一个待删除文档
			
 
				+func (indexer *Indexer) RemoveDocumentToCache(docId uint64, forceUpdate bool) bool {
			
 
				+	if indexer.initialized == false {
			
 
				+		log.Fatal("索引器尚未初始化")
			
 
				+	}
			
 
				 
			
 
				-		// 当索引不存在时，插入新索引项
			
 
				-		switch indexer.initOptions.IndexType {
			
 
				-		case types.LocationsIndex:
			
 
				-			indices.locations = append(indices.locations, []int{})
			
 
				-			copy(indices.locations[position+1:], indices.locations[position:])
			
 
				-			indices.locations[position] = keyword.Starts
			
 
				-		case types.FrequenciesIndex:
			
 
				-			indices.frequencies = append(indices.frequencies, float32(0))
			
 
				-			copy(indices.frequencies[position+1:], indices.frequencies[position:])
			
 
				-			indices.frequencies[position] = keyword.Frequency
			
 
				+	indexer.removeCacheLock.Lock()
			
 
				+	if docId != 0 {
			
 
				+		indexer.tableLock.Lock()
			
 
				+		if docState, ok := indexer.tableLock.docsState[docId]; ok && docState == 0 {
			
 
				+			indexer.removeCacheLock.removeCache[indexer.removeCacheLock.removeCachePointer] = docId
			
 
				+			indexer.removeCacheLock.removeCachePointer++
			
 
				+			indexer.tableLock.docsState[docId] = 1
			
 
				+			indexer.numDocuments--
			
 
				+		} else if !ok {
			
 
				+			// 删除一个等待加入的文档
			
 
				+			indexer.tableLock.docsState[docId] = 1
			
 
				 		}
			
 
				-		indices.docIds = append(indices.docIds, 0)
			
 
				-		copy(indices.docIds[position+1:], indices.docIds[position:])
			
 
				-		indices.docIds[position] = document.DocId
			
 
				+		indexer.tableLock.Unlock()
			
 
				+	}
			
 
				+
			
 
				+	if indexer.removeCacheLock.removeCachePointer > 0 &&
			
 
				+		(indexer.removeCacheLock.removeCachePointer >= indexer.initOptions.DocCacheSize ||
			
 
				+			forceUpdate) {
			
 
				+		removeCachedDocuments := indexer.removeCacheLock.removeCache[:indexer.removeCacheLock.removeCachePointer]
			
 
				+		indexer.removeCacheLock.removeCachePointer = 0
			
 
				+		indexer.removeCacheLock.Unlock()
			
 
				+		sort.Sort(removeCachedDocuments)
			
 
				+		indexer.RemoveDocuments(&removeCachedDocuments)
			
 
				+		return true
			
 
				+	}
			
 
				+	indexer.removeCacheLock.Unlock()
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+// 向反向索引表中删除 REMOVECACHE 中所有文档
			
 
				+func (indexer *Indexer) RemoveDocuments(documents *types.DocumentsId) {
			
 
				+	if indexer.initialized == false {
			
 
				+		log.Fatal("索引器尚未初始化")
			
 
				+	}
			
 
				+
			
 
				+	indexer.tableLock.Lock()
			
 
				+	defer indexer.tableLock.Unlock()
			
 
				+
			
 
				+	// 更新文档关键词总长度，删除文档状态
			
 
				+	for _, docId := range *documents {
			
 
				+		indexer.totalTokenLength -= indexer.docTokenLengths[docId]
			
 
				+		delete(indexer.docTokenLengths, docId)
			
 
				+		delete(indexer.tableLock.docsState, docId)
			
 
				 	}
			
 
				 
			
 
				-	// 更新文章总数
			
 
				-	if docIdIsNew {
			
 
				-		indexer.tableLock.docs[document.DocId] = true
			
 
				-		indexer.numDocuments++
			
 
				+	for keyword, indices := range indexer.tableLock.table {
			
 
				+		indicesTop, indicesPointer := 0, 0
			
 
				+		documentsPointer := sort.Search(
			
 
				+			len(*documents), func(i int) bool { return (*documents)[i] >= indices.docIds[0] })
			
 
				+		// 双指针扫描，进行批量删除操作
			
 
				+		for ; documentsPointer < len(*documents) &&
			
 
				+			indicesPointer < indexer.getIndexLength(indices); indicesPointer++ {
			
 
				+			if indices.docIds[indicesPointer] < (*documents)[documentsPointer] {
			
 
				+				if indicesTop != indicesPointer {
			
 
				+					switch indexer.initOptions.IndexType {
			
 
				+					case types.LocationsIndex:
			
 
				+						indices.locations[indicesTop] = indices.locations[indicesPointer]
			
 
				+					case types.FrequenciesIndex:
			
 
				+						indices.frequencies[indicesTop] = indices.frequencies[indicesPointer]
			
 
				+					}
			
 
				+					indices.docIds[indicesTop] = indices.docIds[indicesPointer]
			
 
				+				}
			
 
				+				indicesTop++
			
 
				+			} else {
			
 
				+				documentsPointer++
			
 
				+			}
			
 
				+		}
			
 
				+		if indicesTop != indicesPointer {
			
 
				+			switch indexer.initOptions.IndexType {
			
 
				+			case types.LocationsIndex:
			
 
				+				indices.locations = append(
			
 
				+					indices.locations[:indicesTop], indices.locations[indicesPointer:]...)
			
 
				+			case types.FrequenciesIndex:
			
 
				+				indices.frequencies = append(
			
 
				+					indices.frequencies[:indicesTop], indices.frequencies[indicesPointer:]...)
			
 
				+			}
			
 
				+			indices.docIds = append(
			
 
				+				indices.docIds[:indicesTop], indices.docIds[indicesPointer:]...)
			
 
				+		}
			
 
				+		if len(indices.docIds) == 0 {
			
 
				+			delete(indexer.tableLock.table, keyword)
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -177,8 +342,7 @@ func (indexer *Indexer) Lookup(
 
				 		// 以第一个搜索键出现的文档作为基准，并遍历其他搜索键搜索同一文档
			
 
				 		baseDocId := indexer.getDocId(table[0], indexPointers[0])
			
 
				 		if docIds != nil {
			
 
				-			_, found := docIds[baseDocId]
			
 
				-			if !found {
			
 
				+			if _, found := docIds[baseDocId]; !found {
			
 
				 				continue
			
 
				 			}
			
 
				 		}
			
@@ -208,7 +372,7 @@ func (indexer *Indexer) Lookup(
 
				 		}
			
 
				 
			
 
				 		if found {
			
 
				-			if _, ok := indexer.tableLock.docs[baseDocId]; !ok {
			
 
				+			if docState, ok := indexer.tableLock.docsState[baseDocId]; !ok || docState != 0 {
			
 
				 				continue
			
 
				 			}
			
 
				 			indexedDoc := types.IndexedDocument{}
			
@@ -233,7 +397,7 @@ func (indexer *Indexer) Lookup(
 
				 				}
			
 
				 
			
 
				 				// 计算搜索键在文档中的紧邻距离
			
 
				-				tokenProximity, tokenLocations := computeTokenProximity(table[:len(tokens)], indexPointers, tokens)
			
 
				+				tokenProximity, tokenLocations := computeTokenProximity(table[:len(tokens)], &indexPointers, tokens)
			
 
				 				indexedDoc.TokenProximity = int32(tokenProximity)
			
 
				 				indexedDoc.TokenSnippetLocations = tokenLocations
			
 
				 
			
@@ -323,7 +487,7 @@ func (indexer *Indexer) searchIndex(
 
				 //
			
 
				 // 具体由动态规划实现，依次计算前 i 个 token 在每个出现位置的最优值。
			
 
				 // 选定的 P_i 通过 tokenLocations 参数传回。
			
 
				-func computeTokenProximity(table []*KeywordIndices, indexPointers []int, tokens []string) (
			
 
				+func computeTokenProximity(table []*KeywordIndices, indexPointers *[]int, tokens []string) (
			
 
				 	minTokenProximity int, tokenLocations []int) {
			
 
				 	minTokenProximity = -1
			
 
				 	tokenLocations = make([]int, len(tokens))
			
@@ -337,14 +501,14 @@ func computeTokenProximity(table []*KeywordIndices, indexPointers []int, tokens
 
				 	// 初始化路径数组
			
 
				 	path = make([][]int, len(tokens))
			
 
				 	for i := 1; i < len(path); i++ {
			
 
				-		path[i] = make([]int, len(table[i].locations[indexPointers[i]]))
			
 
				+		path[i] = make([]int, len(table[i].locations[(*indexPointers)[i]]))
			
 
				 	}
			
 
				 
			
 
				 	// 动态规划
			
 
				-	currentLocations = table[0].locations[indexPointers[0]]
			
 
				+	currentLocations = table[0].locations[(*indexPointers)[0]]
			
 
				 	currentMinValues = make([]int, len(currentLocations))
			
 
				 	for i := 1; i < len(tokens); i++ {
			
 
				-		nextLocations = table[i].locations[indexPointers[i]]
			
 
				+		nextLocations = table[i].locations[(*indexPointers)[i]]
			
 
				 		nextMinValues = make([]int, len(nextLocations))
			
 
				 		for j, _ := range nextMinValues {
			
 
				 			nextMinValues[j] = -1
			
@@ -396,29 +560,7 @@ func computeTokenProximity(table []*KeywordIndices, indexPointers []int, tokens
 
				 		if i != len(tokens)-1 {
			
 
				 			cursor = path[i+1][cursor]
			
 
				 		}
			
 
				-		tokenLocations[i] = table[i].locations[indexPointers[i]][cursor]
			
 
				+		tokenLocations[i] = table[i].locations[(*indexPointers)[i]][cursor]
			
 
				 	}
			
 
				 	return
			
 
				 }
			
 
				-
			
 
				-// 从KeywordIndices中得到第i个文档的DocId
			
 
				-func (indexer *Indexer) getDocId(ti *KeywordIndices, i int) uint64 {
			
 
				-	return ti.docIds[i]
			
 
				-}
			
 
				-
			
 
				-// 得到KeywordIndices中文档总数
			
 
				-func (indexer *Indexer) getIndexLength(ti *KeywordIndices) int {
			
 
				-	return len(ti.docIds)
			
 
				-}
			
 
				-
			
 
				-// 删除某个文档
			
 
				-func (indexer *Indexer) RemoveDoc(docId uint64) {
			
 
				-	if indexer.initialized == false {
			
 
				-		log.Fatal("排序器尚未初始化")
			
 
				-	}
			
 
				-
			
 
				-	indexer.tableLock.Lock()
			
 
				-	delete(indexer.tableLock.docs, docId)
			
 
				-	indexer.numDocuments--
			
 
				-	indexer.tableLock.Unlock()
			
 
				-}
			
--- a/core/indexer_test.go
+++ b/core/indexer_test.go
@@ -9,200 +9,259 @@ import (
 
				 func TestAddKeywords(t *testing.T) {
			
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId:    1,
			
 
				 		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:    7,
			
 
				-		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	}, false)
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId:    2,
			
 
				-		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:    3,
			
 
				 		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:    1,
			
 
				-		Keywords: []types.KeywordIndex{{"token1", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	}, false)
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId:    3,
			
 
				+		Keywords: []types.KeywordIndex{{"token3", 0, []int{}}},
			
 
				+	}, false)
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId:    7,
			
 
				+		Keywords: []types.KeywordIndex{{"token7", 0, []int{}}},
			
 
				+	}, false)
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId:    1,
			
 
				 		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:    2,
			
 
				-		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
			
 
				-	})
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:    0,
			
 
				-		Keywords: []types.KeywordIndex{{"token2", 0, []int{}}},
			
 
				-	})
			
 
				+	}, false)
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId:    7,
			
 
				+		Keywords: []types.KeywordIndex{{"token77", 0, []int{}}},
			
 
				+	}, false)
			
 
				+	indexer.AddDocumentToCache(nil, true)
			
 
				 
			
 
				-	utils.Expect(t, "1 2 7 ", indicesToString(&indexer, "token1"))
			
 
				-	utils.Expect(t, "0 1 2 3 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "3 ", indicesToString(&indexer, "token3"))
			
 
				+	utils.Expect(t, "7 ", indicesToString(&indexer, "token77"))
			
 
				 }
			
 
				 
			
 
				-func TestLookup(t *testing.T) {
			
 
				+func TestRemoveDocument(t *testing.T) {
			
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				-	// doc0 = "token2 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0}},
			
 
				 			{"token3", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc1 = "token1 token2 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	}, false)
			
 
				+	// doc2 = "token1 token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 2,
			
 
				+		Keywords: []types.KeywordIndex{
			
 
				+			{"token1", 0, []int{0}},
			
 
				+			{"token2", 0, []int{7}},
			
 
				+		},
			
 
				+	}, true)
			
 
				+	utils.Expect(t, "2 ", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "1 ", indicesToString(&indexer, "token3"))
			
 
				+
			
 
				+	indexer.RemoveDocumentToCache(2, false)
			
 
				+	// doc1 = "token1 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 1,
			
 
				+		Keywords: []types.KeywordIndex{
			
 
				+			{"token1", 0, []int{0}},
			
 
				+			{"token3", 0, []int{7}},
			
 
				+		},
			
 
				+	}, true)
			
 
				+	utils.Expect(t, "1 ", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "1 ", indicesToString(&indexer, "token3"))
			
 
				+
			
 
				+	// doc2 = "token1 token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 2,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token2", 0, []int{7}},
			
 
				 			{"token3", 0, []int{14}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc2 = "token1 token2"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	}, true)
			
 
				+	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "2 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token3"))
			
 
				+
			
 
				+	// doc3 = "token1 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 3,
			
 
				+		Keywords: []types.KeywordIndex{
			
 
				+			{"token1", 0, []int{0}},
			
 
				+			{"token2", 0, []int{7}},
			
 
				+		},
			
 
				+	}, false)
			
 
				+	indexer.RemoveDocumentToCache(3, false)
			
 
				+	indexer.AddDocumentToCache(nil, true)
			
 
				+	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "2 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "1 2 ", indicesToString(&indexer, "token3"))
			
 
				+}
			
 
				+
			
 
				+func TestLookupLocationsIndex(t *testing.T) {
			
 
				+	var indexer Indexer
			
 
				+	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				+	// doc1 = "token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				+		Keywords: []types.KeywordIndex{
			
 
				+			{"token2", 0, []int{0}},
			
 
				+			{"token3", 0, []int{7}},
			
 
				+		},
			
 
				+	}, false)
			
 
				+	// doc2 = "token1 token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 2,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token2", 0, []int{7}},
			
 
				+			{"token3", 0, []int{14}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc3 = "token2"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	}, false)
			
 
				+	// doc3 = "token1 token2"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 3,
			
 
				+		Keywords: []types.KeywordIndex{
			
 
				+			{"token1", 0, []int{0}},
			
 
				+			{"token2", 0, []int{7}},
			
 
				+		},
			
 
				+	}, false)
			
 
				+	// doc4 = "token2"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 4,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, false)
			
 
				 	// doc7 = "token1 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 7,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token3", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, false)
			
 
				 	// doc9 = "token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 9,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token3", 0, []int{0}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				-	utils.Expect(t, "1 2 7 ", indicesToString(&indexer, "token1"))
			
 
				-	utils.Expect(t, "0 1 2 3 ", indicesToString(&indexer, "token2"))
			
 
				-	utils.Expect(t, "0 1 7 9 ", indicesToString(&indexer, "token3"))
			
 
				+	utils.Expect(t, "2 3 7 ", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "1 2 3 4 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "1 2 7 9 ", indicesToString(&indexer, "token3"))
			
 
				 
			
 
				 	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token4"}, []string{}, nil, false)))
			
 
				 
			
 
				-	utils.Expect(t, "[7 0 [0]] [2 0 [0]] [1 0 [0]] ",
			
 
				+	utils.Expect(t, "[7 0 [0]] [3 0 [0]] [2 0 [0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1"}, []string{}, nil, false)))
			
 
				 	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token1", "token4"}, []string{}, nil, false)))
			
 
				 
			
 
				-	utils.Expect(t, "[2 1 [0 7]] [1 1 [0 7]] ",
			
 
				+	utils.Expect(t, "[3 1 [0 7]] [2 1 [0 7]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1", "token2"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[2 13 [7 0]] [1 13 [7 0]] ",
			
 
				+	utils.Expect(t, "[3 13 [7 0]] [2 13 [7 0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2", "token1"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[7 1 [0 7]] [1 8 [0 14]] ",
			
 
				+	utils.Expect(t, "[7 1 [0 7]] [2 8 [0 14]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1", "token3"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[7 13 [7 0]] [1 20 [14 0]] ",
			
 
				+	utils.Expect(t, "[7 13 [7 0]] [2 20 [14 0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token3", "token1"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[1 1 [7 14]] [0 1 [0 7]] ",
			
 
				+	utils.Expect(t, "[2 1 [7 14]] [1 1 [0 7]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[1 13 [14 7]] [0 13 [7 0]] ",
			
 
				+	utils.Expect(t, "[2 13 [14 7]] [1 13 [7 0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token3", "token2"}, []string{}, nil, false)))
			
 
				 
			
 
				-	utils.Expect(t, "[1 2 [0 7 14]] ",
			
 
				+	utils.Expect(t, "[2 2 [0 7 14]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1", "token2", "token3"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[1 26 [14 7 0]] ",
			
 
				+	utils.Expect(t, "[2 26 [14 7 0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token3", "token2", "token1"}, []string{}, nil, false)))
			
 
				 }
			
 
				 
			
 
				-func TestDocIdsIndex(t *testing.T) {
			
 
				+func TestLookupDocIdsIndex(t *testing.T) {
			
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.DocIdsIndex})
			
 
				-	// doc0 = "token2 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0}},
			
 
				 			{"token3", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc1 = "token1 token2 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 1,
			
 
				+	}, false)
			
 
				+	// doc2 = "token1 token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 2,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token2", 0, []int{7}},
			
 
				 			{"token3", 0, []int{14}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc2 = "token1 token2"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 2,
			
 
				+	}, false)
			
 
				+	// doc3 = "token1 token2"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 3,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token2", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc3 = "token2"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 3,
			
 
				+	}, false)
			
 
				+	// doc4 = "token2"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 4,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, false)
			
 
				 	// doc7 = "token1 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 7,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token3", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, false)
			
 
				 	// doc9 = "token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				 		DocId: 9,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token3", 0, []int{0}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				-	utils.Expect(t, "1 2 7 ", indicesToString(&indexer, "token1"))
			
 
				-	utils.Expect(t, "0 1 2 3 ", indicesToString(&indexer, "token2"))
			
 
				-	utils.Expect(t, "0 1 7 9 ", indicesToString(&indexer, "token3"))
			
 
				+	utils.Expect(t, "2 3 7 ", indicesToString(&indexer, "token1"))
			
 
				+	utils.Expect(t, "1 2 3 4 ", indicesToString(&indexer, "token2"))
			
 
				+	utils.Expect(t, "1 2 7 9 ", indicesToString(&indexer, "token3"))
			
 
				 
			
 
				 	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token4"}, []string{}, nil, false)))
			
 
				 
			
 
				-	utils.Expect(t, "[7 0 []] [2 0 []] [1 0 []] ",
			
 
				+	utils.Expect(t, "[7 0 []] [3 0 []] [2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1"}, []string{}, nil, false)))
			
 
				 	utils.Expect(t, "", indexedDocsToString(indexer.Lookup([]string{"token1", "token4"}, []string{}, nil, false)))
			
 
				 
			
 
				-	utils.Expect(t, "[2 0 []] [1 0 []] ",
			
 
				+	utils.Expect(t, "[3 0 []] [2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1", "token2"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[2 0 []] [1 0 []] ",
			
 
				+	utils.Expect(t, "[3 0 []] [2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2", "token1"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[7 0 []] [1 0 []] ",
			
 
				+	utils.Expect(t, "[7 0 []] [2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1", "token3"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[7 0 []] [1 0 []] ",
			
 
				+	utils.Expect(t, "[7 0 []] [2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token3", "token1"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[1 0 []] [0 0 []] ",
			
 
				+	utils.Expect(t, "[2 0 []] [1 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[1 0 []] [0 0 []] ",
			
 
				+	utils.Expect(t, "[2 0 []] [1 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token3", "token2"}, []string{}, nil, false)))
			
 
				 
			
 
				-	utils.Expect(t, "[1 0 []] ",
			
 
				+	utils.Expect(t, "[2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token1", "token2", "token3"}, []string{}, nil, false)))
			
 
				-	utils.Expect(t, "[1 0 []] ",
			
 
				+	utils.Expect(t, "[2 0 []] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token3", "token2", "token1"}, []string{}, nil, false)))
			
 
				 }
			
 
				 
			
@@ -210,69 +269,69 @@ func TestLookupWithProximity(t *testing.T) {
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				 
			
 
				-	// doc0 = "token2 token4 token4 token2 token3 token4"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "token2 token4 token4 token2 token3 token4"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0, 21}},
			
 
				 			{"token3", 0, []int{28}},
			
 
				 			{"token4", 0, []int{7, 14, 35}},
			
 
				 		},
			
 
				-	})
			
 
				-	utils.Expect(t, "[0 1 [21 28]] ",
			
 
				+	}, true)
			
 
				+	utils.Expect(t, "[1 1 [21 28]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)))
			
 
				 
			
 
				-	// doc0 = "t2 t1 . . . t2 t3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "t2 t1 . . . t2 t3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"t1", 0, []int{3}},
			
 
				 			{"t2", 0, []int{0, 12}},
			
 
				 			{"t3", 0, []int{15}},
			
 
				 		},
			
 
				-	})
			
 
				-	utils.Expect(t, "[0 8 [3 12 15]] ",
			
 
				+	}, true)
			
 
				+	utils.Expect(t, "[1 8 [3 12 15]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"t1", "t2", "t3"}, []string{}, nil, false)))
			
 
				 
			
 
				-	// doc0 = "t3 t2 t1 . . . . . t2 t3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "t3 t2 t1 . . . . . t2 t3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"t1", 0, []int{6}},
			
 
				 			{"t2", 0, []int{3, 19}},
			
 
				 			{"t3", 0, []int{0, 22}},
			
 
				 		},
			
 
				-	})
			
 
				-	utils.Expect(t, "[0 10 [6 3 0]] ",
			
 
				+	}, true)
			
 
				+	utils.Expect(t, "[1 10 [6 3 0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"t1", "t2", "t3"}, []string{}, nil, false)))
			
 
				 }
			
 
				 
			
 
				 func TestLookupWithPartialLocations(t *testing.T) {
			
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				-	// doc0 = "token2 token4 token4 token2 token3 token4" + "label1"(不在文本中)
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "token2 token4 token4 token2 token3 token4" + "label1"(不在文本中)
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0, 21}},
			
 
				 			{"token3", 0, []int{28}},
			
 
				 			{"label1", 0, []int{}},
			
 
				 			{"token4", 0, []int{7, 14, 35}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc1 = "token2 token4 token4 token2 token3 token4"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 1,
			
 
				+	}, false)
			
 
				+	// doc2 = "token2 token4 token4 token2 token3 token4"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 2,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0, 21}},
			
 
				 			{"token3", 0, []int{28}},
			
 
				 			{"token4", 0, []int{7, 14, 35}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				-	utils.Expect(t, "0 ", indicesToString(&indexer, "label1"))
			
 
				+	utils.Expect(t, "1 ", indicesToString(&indexer, "label1"))
			
 
				 
			
 
				-	utils.Expect(t, "[0 1 [21 28]] ",
			
 
				+	utils.Expect(t, "[1 1 [21 28]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2", "token3"}, []string{"label1"}, nil, false)))
			
 
				 }
			
 
				 
			
@@ -285,25 +344,25 @@ func TestLookupWithBM25(t *testing.T) {
 
				 			B:  1,
			
 
				 		},
			
 
				 	})
			
 
				-	// doc0 = "token2 token4 token4 token2 token3 token4"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:       0,
			
 
				+	// doc1 = "token2 token4 token4 token2 token3 token4"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId:       1,
			
 
				 		TokenLength: 6,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 3, []int{0, 21}},
			
 
				 			{"token3", 7, []int{28}},
			
 
				 			{"token4", 15, []int{7, 14, 35}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc0 = "token6 token7"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId:       1,
			
 
				+	}, false)
			
 
				+	// doc2 = "token6 token7"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId:       2,
			
 
				 		TokenLength: 2,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token6", 3, []int{0}},
			
 
				 			{"token7", 15, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				 	outputs, _ := indexer.Lookup([]string{"token2", "token3", "token4"}, []string{}, nil, false)
			
 
				 
			
@@ -314,59 +373,70 @@ func TestLookupWithBM25(t *testing.T) {
 
				 func TestLookupWithinDocIds(t *testing.T) {
			
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				-	// doc0 = "token2 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0}},
			
 
				 			{"token3", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc1 = "token1 token2 token3"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 1,
			
 
				+	}, false)
			
 
				+	// doc2 = "token1 token2 token3"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 2,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token2", 0, []int{7}},
			
 
				 			{"token3", 0, []int{14}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc2 = "token1 token2"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 2,
			
 
				+	}, false)
			
 
				+	// doc3 = "token1 token2"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 3,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token1", 0, []int{0}},
			
 
				 			{"token2", 0, []int{7}},
			
 
				 		},
			
 
				-	})
			
 
				-	// doc3 = "token2"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 3,
			
 
				+	}, false)
			
 
				+	// doc4 = "token2"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 4,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				 	docIds := make(map[uint64]bool)
			
 
				-	docIds[0] = true
			
 
				-	docIds[2] = true
			
 
				-	utils.Expect(t, "[2 0 [7]] [0 0 [0]] ",
			
 
				+	docIds[1] = true
			
 
				+	docIds[3] = true
			
 
				+	utils.Expect(t, "[3 0 [7]] [1 0 [0]] ",
			
 
				 		indexedDocsToString(indexer.Lookup([]string{"token2"}, []string{}, docIds, false)))
			
 
				 }
			
 
				 
			
 
				 func TestLookupWithLocations(t *testing.T) {
			
 
				 	var indexer Indexer
			
 
				 	indexer.Init(types.IndexerInitOptions{IndexType: types.LocationsIndex})
			
 
				-	// doc0 = "token2 token4 token4 token2 token3 token4"
			
 
				-	indexer.AddDocument(&types.DocumentIndex{
			
 
				-		DocId: 0,
			
 
				+	// doc1 = "token2 token4 token4 token2 token3 token4"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 1,
			
 
				 		Keywords: []types.KeywordIndex{
			
 
				 			{"token2", 0, []int{0, 21}},
			
 
				 			{"token3", 0, []int{28}},
			
 
				 			{"token4", 0, []int{7, 14, 35}},
			
 
				 		},
			
 
				-	})
			
 
				+	}, true)
			
 
				+
			
 
				+	// doc2 = "token2 token4 token4 token2 token3 token4"
			
 
				+	indexer.AddDocumentToCache(&types.DocumentIndex{
			
 
				+		DocId: 2,
			
 
				+		Keywords: []types.KeywordIndex{
			
 
				+			{"token3", 0, []int{0, 21}},
			
 
				+			{"token5", 0, []int{28}},
			
 
				+			{"token2", 0, []int{7, 14, 35}},
			
 
				+		},
			
 
				+	}, true)
			
 
				 
			
 
				+	indexer.RemoveDocumentToCache(2, true)
			
 
				 	docs, _ := indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)
			
 
				 	utils.Expect(t, "[[0 21] [28]]", docs[0].TokenLocations)
			
 
				 }
			
--- a/core/ranker_test.go
+++ b/core/ranker_test.go
@@ -97,7 +97,7 @@ func TestRankWithCriteria(t *testing.T) {
 
				 	utils.Expect(t, "[1 [25300 ]] [3 [17300 ]] ", scoredDocsToString(scoredDocs))
			
 
				 }
			
 
				 
			
 
				-func TestRemoveDocument(t *testing.T) {
			
 
				+func TestRemoveDoc(t *testing.T) {
			
 
				 	var ranker Ranker
			
 
				 	ranker.Init()
			
 
				 	ranker.AddDoc(1, DummyScoringFields{
			
--- a/core/test_utils.go
+++ b/core/test_utils.go
@@ -6,10 +6,11 @@ import (
 
				 )
			
 
				 
			
 
				 func indicesToString(indexer *Indexer, token string) (output string) {
			
 
				-	indices := indexer.tableLock.table[token]
			
 
				-	for i := 0; i < indexer.getIndexLength(indices); i++ {
			
 
				-		output += fmt.Sprintf("%d ",
			
 
				-			indexer.getDocId(indices, i))
			
 
				+	if indices, ok := indexer.tableLock.table[token]; ok {
			
 
				+		for i := 0; i < indexer.getIndexLength(indices); i++ {
			
 
				+			output += fmt.Sprintf("%d ",
			
 
				+				indexer.getDocId(indices, i))
			
 
				+		}
			
 
				 	}
			
 
				 	return
			
 
				 }
			
--- a/engine/counters.go
+++ b/engine/counters.go
@@ -7,3 +7,7 @@ func (engine *Engine) NumTokenIndexAdded() uint64 {
 
				 func (engine *Engine) NumDocumentsIndexed() uint64 {
			
 
				 	return engine.numDocumentsIndexed
			
 
				 }
			
 
				+
			
 
				+func (engine *Engine) NumDocumentsRemoved() uint64 {
			
 
				+	return engine.numDocumentsRemoved
			
 
				+}
			
--- a/engine/engine.go
+++ b/engine/engine.go
@@ -24,10 +24,14 @@ const (
 
				 
			
 
				 type Engine struct {
			
 
				 	// 计数器，用来统计有多少文档被索引等信息
			
 
				-	numDocumentsIndexed uint64
			
 
				-	numIndexingRequests uint64
			
 
				-	numTokenIndexAdded  uint64
			
 
				-	numDocumentsStored  uint64
			
 
				+	numDocumentsIndexed      uint64
			
 
				+	numDocumentsRemoved      uint64
			
 
				+	numDocumentsForceUpdated uint64
			
 
				+	numIndexingRequests      uint64
			
 
				+	numRemovingRequests      uint64
			
 
				+	numForceUpdatingRequests uint64
			
 
				+	numTokenIndexAdded       uint64
			
 
				+	numDocumentsStored       uint64
			
 
				 
			
 
				 	// 记录初始化参数
			
 
				 	initOptions types.EngineInitOptions
			
@@ -40,10 +44,10 @@ type Engine struct {
 
				 	dbs        []storage.Storage
			
 
				 
			
 
				 	// 建立索引器使用的通信通道
			
 
				-	segmenterChannel           chan segmenterRequest
			
 
				-	indexerAddDocumentChannels []chan indexerAddDocumentRequest
			
 
				-	indexerRemoveDocChannels   []chan indexerRemoveDocRequest
			
 
				-	rankerAddDocChannels       []chan rankerAddDocRequest
			
 
				+	segmenterChannel         chan segmenterRequest
			
 
				+	indexerAddDocChannels    []chan indexerAddDocumentRequest
			
 
				+	indexerRemoveDocChannels []chan indexerRemoveDocRequest
			
 
				+	rankerAddDocChannels     []chan rankerAddDocRequest
			
 
				 
			
 
				 	// 建立排序器使用的通信通道
			
 
				 	indexerLookupChannels   []chan indexerLookupRequest
			
@@ -86,20 +90,23 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 
				 
			
 
				 	// 初始化分词器通道
			
 
				 	engine.segmenterChannel = make(
			
 
				+		//chan segmenterRequest)
			
 
				 		chan segmenterRequest, options.NumSegmenterThreads)
			
 
				 
			
 
				 	// 初始化索引器通道
			
 
				-	engine.indexerAddDocumentChannels = make(
			
 
				+	engine.indexerAddDocChannels = make(
			
 
				 		[]chan indexerAddDocumentRequest, options.NumShards)
			
 
				 	engine.indexerRemoveDocChannels = make(
			
 
				 		[]chan indexerRemoveDocRequest, options.NumShards)
			
 
				 	engine.indexerLookupChannels = make(
			
 
				 		[]chan indexerLookupRequest, options.NumShards)
			
 
				 	for shard := 0; shard < options.NumShards; shard++ {
			
 
				-		engine.indexerAddDocumentChannels[shard] = make(
			
 
				+		engine.indexerAddDocChannels[shard] = make(
			
 
				+			//chan indexerAddDocumentRequest)
			
 
				 			chan indexerAddDocumentRequest,
			
 
				 			options.IndexerBufferLength)
			
 
				 		engine.indexerRemoveDocChannels[shard] = make(
			
 
				+			//chan indexerRemoveDocRequest)
			
 
				 			chan indexerRemoveDocRequest,
			
 
				 			options.IndexerBufferLength)
			
 
				 		engine.indexerLookupChannels[shard] = make(
			
@@ -215,68 +222,74 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 
				 // 将文档加入索引
			
 
				 //
			
 
				 // 输入参数：
			
 
				-// 	docId	标识文档编号，必须唯一
			
 
				-//	data	见DocumentIndexData注释
			
 
				+//  docId	标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
			
 
				+//  data	见DocumentIndexData注释
			
 
				 //
			
 
				 // 注意：
			
 
				 //      1. 这个函数是线程安全的，请尽可能并发调用以提高索引速度
			
 
				-// 	2. 这个函数调用是非同步的，也就是说在函数返回时有可能文档还没有加入索引中，因此
			
 
				+//      2. 这个函数调用是非同步的，也就是说在函数返回时有可能文档还没有加入索引中，因此
			
 
				 //         如果立刻调用Search可能无法查询到这个文档。强制刷新索引请调用FlushIndex函数。
			
 
				-func (engine *Engine) IndexDocument(docId uint64, data types.DocumentIndexData) {
			
 
				-	engine.internalIndexDocument(docId, data)
			
 
				+func (engine *Engine) IndexDocument(docId uint64, data types.DocumentIndexData, forceUpdate bool) {
			
 
				+	engine.internalIndexDocument(docId, data, forceUpdate)
			
 
				 
			
 
				 	hash := murmur.Murmur3([]byte(fmt.Sprint("%d", docId))) % uint32(engine.initOptions.PersistentStorageShards)
			
 
				-	if engine.initOptions.UsePersistentStorage {
			
 
				+	if engine.initOptions.UsePersistentStorage && docId != 0 {
			
 
				 		engine.persistentStorageIndexDocumentChannels[hash] <- persistentStorageIndexDocumentRequest{docId: docId, data: data}
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-func (engine *Engine) internalIndexDocument(docId uint64, data types.DocumentIndexData) {
			
 
				+func (engine *Engine) internalIndexDocument(
			
 
				+	docId uint64, data types.DocumentIndexData, forceUpdate bool) {
			
 
				 	if !engine.initialized {
			
 
				 		log.Fatal("必须先初始化引擎")
			
 
				 	}
			
 
				 
			
 
				-	atomic.AddUint64(&engine.numIndexingRequests, 1)
			
 
				+	if docId != 0 {
			
 
				+		atomic.AddUint64(&engine.numIndexingRequests, 1)
			
 
				+	}
			
 
				+	if forceUpdate {
			
 
				+		atomic.AddUint64(&engine.numForceUpdatingRequests, 1)
			
 
				+	}
			
 
				 	hash := murmur.Murmur3([]byte(fmt.Sprint("%d%s", docId, data.Content)))
			
 
				 	engine.segmenterChannel <- segmenterRequest{
			
 
				-		docId: docId, hash: hash, data: data}
			
 
				+		docId: docId, hash: hash, data: data, forceUpdate: forceUpdate}
			
 
				 }
			
 
				 
			
 
				 // 将文档从索引中删除
			
 
				 //
			
 
				 // 输入参数：
			
 
				-// 	docId	标识文档编号，必须唯一
			
 
				+//  docId	标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
			
 
				 //
			
 
				-// 注意：这个函数仅从排序器中删除文档，索引器不会发生变化。
			
 
				-func (engine *Engine) RemoveDocument(docId uint64) {
			
 
				+// 注意：
			
 
				+//      1. 这个函数是线程安全的，请尽可能并发调用以提高索引速度
			
 
				+//      2. 这个函数调用是非同步的，也就是说在函数返回时有可能文档还没有加入索引中，因此
			
 
				+//         如果立刻调用Search可能无法查询到这个文档。强制刷新索引请调用FlushIndex函数。
			
 
				+func (engine *Engine) RemoveDocument(docId uint64, forceUpdate bool) {
			
 
				 	if !engine.initialized {
			
 
				 		log.Fatal("必须先初始化引擎")
			
 
				 	}
			
 
				 
			
 
				+	if docId != 0 {
			
 
				+		atomic.AddUint64(&engine.numRemovingRequests, 1)
			
 
				+	}
			
 
				+	if forceUpdate {
			
 
				+		atomic.AddUint64(&engine.numForceUpdatingRequests, 1)
			
 
				+	}
			
 
				 	for shard := 0; shard < engine.initOptions.NumShards; shard++ {
			
 
				-		engine.indexerRemoveDocChannels[shard] <- indexerRemoveDocRequest{docId: docId}
			
 
				+		engine.indexerRemoveDocChannels[shard] <- indexerRemoveDocRequest{docId: docId, forceUpdate: forceUpdate}
			
 
				+		if docId == 0 {
			
 
				+			continue
			
 
				+		}
			
 
				 		engine.rankerRemoveDocChannels[shard] <- rankerRemoveDocRequest{docId: docId}
			
 
				 	}
			
 
				 
			
 
				-	if engine.initOptions.UsePersistentStorage {
			
 
				+	if engine.initOptions.UsePersistentStorage && docId != 0 {
			
 
				 		// 从数据库中删除
			
 
				 		hash := murmur.Murmur3([]byte(fmt.Sprint("%d", docId))) % uint32(engine.initOptions.PersistentStorageShards)
			
 
				 		go engine.persistentStorageRemoveDocumentWorker(docId, hash)
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				-// 阻塞等待直到所有索引添加完毕
			
 
				-func (engine *Engine) FlushIndex() {
			
 
				-	for {
			
 
				-		runtime.Gosched()
			
 
				-		if engine.numIndexingRequests == engine.numDocumentsIndexed &&
			
 
				-			(!engine.initOptions.UsePersistentStorage ||
			
 
				-				engine.numIndexingRequests == engine.numDocumentsStored) {
			
 
				-			return
			
 
				-		}
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 // 查找满足搜索条件的文档，此函数线程安全
			
 
				 func (engine *Engine) Search(request types.SearchRequest) (output types.SearchResponse) {
			
 
				 	if !engine.initialized {
			
@@ -397,6 +410,23 @@ func (engine *Engine) Search(request types.SearchRequest) (output types.SearchRe
 
				 	return
			
 
				 }
			
 
				 
			
 
				+// 阻塞等待直到所有索引添加完毕
			
 
				+func (engine *Engine) FlushIndex() {
			
 
				+	// 强制更新，CHANNEL 中 REQUESTS 的无序性可能会导致 CACHE 中有残留
			
 
				+	engine.RemoveDocument(0, true)
			
 
				+	engine.IndexDocument(0, types.DocumentIndexData{}, true)
			
 
				+	for {
			
 
				+		runtime.Gosched()
			
 
				+		if engine.numIndexingRequests == engine.numDocumentsIndexed &&
			
 
				+			engine.numRemovingRequests*uint64(engine.initOptions.NumShards) == engine.numDocumentsRemoved &&
			
 
				+			engine.numForceUpdatingRequests*uint64(engine.initOptions.NumShards) ==
			
 
				+				engine.numDocumentsForceUpdated && (!engine.initOptions.UsePersistentStorage ||
			
 
				+			engine.numIndexingRequests == engine.numDocumentsStored) {
			
 
				+			return
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 // 关闭引擎
			
 
				 func (engine *Engine) Close() {
			
 
				 	engine.FlushIndex()
			
--- a/engine/engine_test.go
+++ b/engine/engine_test.go
@@ -14,31 +14,32 @@ type ScoringFields struct {
 
				 }
			
 
				 
			
 
				 func AddDocs(engine *Engine) {
			
 
				-	docId := uint64(0)
			
 
				+	docId := uint64(1)
			
 
				+	// 因为需要保证文档全部被加入到索引中，所以 forceUpdate 全部设置成 true
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "中国有十三亿人口人口",
			
 
				 		Fields:  ScoringFields{1, 2, 3},
			
 
				-	})
			
 
				+	}, true)
			
 
				 	docId++
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "中国人口",
			
 
				 		Fields:  nil,
			
 
				-	})
			
 
				+	}, true)
			
 
				 	docId++
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "有人口",
			
 
				 		Fields:  ScoringFields{2, 3, 1},
			
 
				-	})
			
 
				+	}, true)
			
 
				 	docId++
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "有十三亿人口",
			
 
				 		Fields:  ScoringFields{2, 3, 3},
			
 
				-	})
			
 
				+	}, true)
			
 
				 	docId++
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "中国十三亿人口",
			
 
				 		Fields:  ScoringFields{0, 9, 1},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				 	engine.FlushIndex()
			
 
				 }
			
@@ -76,15 +77,15 @@ func TestEngineIndexDocument(t *testing.T) {
 
				 	utils.Expect(t, "人口", outputs.Tokens[1])
			
 
				 	utils.Expect(t, "3", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "2", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "1000", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 6]", outputs.Docs[0].TokenSnippetLocations)
			
 
				 
			
 
				-	utils.Expect(t, "4", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "5", outputs.Docs[1].DocId)
			
 
				 	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[2].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[2].DocId)
			
 
				 	utils.Expect(t, "76", int(outputs.Docs[2].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 18]", outputs.Docs[2].TokenSnippetLocations)
			
 
				 }
			
@@ -109,9 +110,9 @@ func TestReverseOrder(t *testing.T) {
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "3", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[0].DocId)
			
 
				-	utils.Expect(t, "4", outputs.Docs[1].DocId)
			
 
				-	utils.Expect(t, "1", outputs.Docs[2].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "5", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "2", outputs.Docs[2].DocId)
			
 
				 }
			
 
				 
			
 
				 func TestOffsetAndMaxOutputs(t *testing.T) {
			
@@ -134,8 +135,8 @@ func TestOffsetAndMaxOutputs(t *testing.T) {
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "2", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "4", outputs.Docs[0].DocId)
			
 
				-	utils.Expect(t, "1", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "5", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "2", outputs.Docs[1].DocId)
			
 
				 }
			
 
				 
			
 
				 type TestScoringCriteria struct {
			
@@ -167,10 +168,10 @@ func TestSearchWithCriteria(t *testing.T) {
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "2", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "18000", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 
			
 
				-	utils.Expect(t, "4", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "5", outputs.Docs[1].DocId)
			
 
				 	utils.Expect(t, "9000", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 }
			
 
				 
			
@@ -188,10 +189,10 @@ func TestCompactIndex(t *testing.T) {
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "2", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "4", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "5", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "9000", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[1].DocId)
			
 
				 	utils.Expect(t, "6000", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 }
			
 
				 
			
@@ -223,11 +224,11 @@ func TestFrequenciesIndex(t *testing.T) {
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "2", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "4", outputs.Docs[0].DocId)
			
 
				-	utils.Expect(t, "2311", int(outputs.Docs[0].Scores[0]*1000))
			
 
				+	utils.Expect(t, "5", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "2349", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[1].DocId)
			
 
				-	utils.Expect(t, "2211", int(outputs.Docs[1].Scores[0]*1000))
			
 
				+	utils.Expect(t, "1", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "2320", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 }
			
 
				 
			
 
				 func TestRemoveDocument(t *testing.T) {
			
@@ -240,12 +241,12 @@ func TestRemoveDocument(t *testing.T) {
 
				 	})
			
 
				 
			
 
				 	AddDocs(&engine)
			
 
				-	engine.RemoveDocument(4)
			
 
				+	engine.RemoveDocument(5, true)
			
 
				 
			
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "1", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "6000", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 }
			
 
				 
			
@@ -263,7 +264,7 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 
				 		},
			
 
				 	})
			
 
				 
			
 
				-	docId := uint64(0)
			
 
				+	docId := uint64(1)
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "",
			
 
				 		Tokens: []types.TokenData{
			
@@ -271,7 +272,7 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 
				 			{"人口", []int{18, 24}},
			
 
				 		},
			
 
				 		Fields: ScoringFields{1, 2, 3},
			
 
				-	})
			
 
				+	}, true)
			
 
				 	docId++
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "",
			
@@ -280,12 +281,12 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 
				 			{"人口", []int{6}},
			
 
				 		},
			
 
				 		Fields: ScoringFields{1, 2, 3},
			
 
				-	})
			
 
				+	}, true)
			
 
				 	docId++
			
 
				 	engine.IndexDocument(docId, types.DocumentIndexData{
			
 
				 		Content: "中国十三亿人口",
			
 
				 		Fields:  ScoringFields{0, 9, 1},
			
 
				-	})
			
 
				+	}, true)
			
 
				 
			
 
				 	engine.FlushIndex()
			
 
				 
			
@@ -295,15 +296,15 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 
				 	utils.Expect(t, "人口", outputs.Tokens[1])
			
 
				 	utils.Expect(t, "3", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "2", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "1000", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 6]", outputs.Docs[0].TokenSnippetLocations)
			
 
				 
			
 
				-	utils.Expect(t, "2", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "3", outputs.Docs[1].DocId)
			
 
				 	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[2].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[2].DocId)
			
 
				 	utils.Expect(t, "76", int(outputs.Docs[2].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 18]", outputs.Docs[2].TokenSnippetLocations)
			
 
				 }
			
@@ -326,7 +327,7 @@ func TestEngineIndexDocumentWithPersistentStorage(t *testing.T) {
 
				 		PersistentStorageShards: 2,
			
 
				 	})
			
 
				 	AddDocs(&engine)
			
 
				-	engine.RemoveDocument(4)
			
 
				+	engine.RemoveDocument(5, true)
			
 
				 	engine.Close()
			
 
				 
			
 
				 	var engine1 Engine
			
@@ -344,6 +345,7 @@ func TestEngineIndexDocumentWithPersistentStorage(t *testing.T) {
 
				 		PersistentStorageFolder: "wukong.persistent",
			
 
				 		PersistentStorageShards: 2,
			
 
				 	})
			
 
				+	engine1.FlushIndex()
			
 
				 
			
 
				 	outputs := engine1.Search(types.SearchRequest{Text: "中国人口"})
			
 
				 	utils.Expect(t, "2", len(outputs.Tokens))
			
@@ -351,11 +353,11 @@ func TestEngineIndexDocumentWithPersistentStorage(t *testing.T) {
 
				 	utils.Expect(t, "人口", outputs.Tokens[1])
			
 
				 	utils.Expect(t, "2", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "2", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "1000", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 6]", outputs.Docs[0].TokenSnippetLocations)
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[1].DocId)
			
 
				 	utils.Expect(t, "76", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 18]", outputs.Docs[1].TokenSnippetLocations)
			
 
				 
			
@@ -379,7 +381,7 @@ func TestCountDocsOnly(t *testing.T) {
 
				 	})
			
 
				 
			
 
				 	AddDocs(&engine)
			
 
				-	engine.RemoveDocument(4)
			
 
				+	engine.RemoveDocument(5, true)
			
 
				 
			
 
				 	outputs := engine.Search(types.SearchRequest{Text: "中国人口", CountDocsOnly: true})
			
 
				 	utils.Expect(t, "0", len(outputs.Docs))
			
@@ -405,8 +407,8 @@ func TestSearchWithin(t *testing.T) {
 
				 	AddDocs(&engine)
			
 
				 
			
 
				 	docIds := make(map[uint64]bool)
			
 
				-	docIds[4] = true
			
 
				-	docIds[0] = true
			
 
				+	docIds[5] = true
			
 
				+	docIds[1] = true
			
 
				 	outputs := engine.Search(types.SearchRequest{
			
 
				 		Text:   "中国人口",
			
 
				 		DocIds: docIds,
			
@@ -416,11 +418,11 @@ func TestSearchWithin(t *testing.T) {
 
				 	utils.Expect(t, "人口", outputs.Tokens[1])
			
 
				 	utils.Expect(t, "2", len(outputs.Docs))
			
 
				 
			
 
				-	utils.Expect(t, "0", outputs.Docs[0].DocId)
			
 
				+	utils.Expect(t, "1", outputs.Docs[0].DocId)
			
 
				 	utils.Expect(t, "76", int(outputs.Docs[0].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 18]", outputs.Docs[0].TokenSnippetLocations)
			
 
				 
			
 
				-	utils.Expect(t, "4", outputs.Docs[1].DocId)
			
 
				+	utils.Expect(t, "5", outputs.Docs[1].DocId)
			
 
				 	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
			
 
				 }
			
--- a/engine/indexer_worker.go
+++ b/engine/indexer_worker.go
@@ -6,7 +6,8 @@ import (
 
				 )
			
 
				 
			
 
				 type indexerAddDocumentRequest struct {
			
 
				-	document *types.DocumentIndex
			
 
				+	document    *types.DocumentIndex
			
 
				+	forceUpdate bool
			
 
				 }
			
 
				 
			
 
				 type indexerLookupRequest struct {
			
@@ -20,16 +21,35 @@ type indexerLookupRequest struct {
 
				 }
			
 
				 
			
 
				 type indexerRemoveDocRequest struct {
			
 
				-	docId uint64
			
 
				+	docId       uint64
			
 
				+	forceUpdate bool
			
 
				 }
			
 
				 
			
 
				 func (engine *Engine) indexerAddDocumentWorker(shard int) {
			
 
				 	for {
			
 
				-		request := <-engine.indexerAddDocumentChannels[shard]
			
 
				-		engine.indexers[shard].AddDocument(request.document)
			
 
				-		atomic.AddUint64(&engine.numTokenIndexAdded,
			
 
				-			uint64(len(request.document.Keywords)))
			
 
				-		atomic.AddUint64(&engine.numDocumentsIndexed, 1)
			
 
				+		request := <-engine.indexerAddDocChannels[shard]
			
 
				+		engine.indexers[shard].AddDocumentToCache(request.document, request.forceUpdate)
			
 
				+		if request.document != nil {
			
 
				+			atomic.AddUint64(&engine.numTokenIndexAdded,
			
 
				+				uint64(len(request.document.Keywords)))
			
 
				+			atomic.AddUint64(&engine.numDocumentsIndexed, 1)
			
 
				+		}
			
 
				+		if request.forceUpdate {
			
 
				+			atomic.AddUint64(&engine.numDocumentsForceUpdated, 1)
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (engine *Engine) indexerRemoveDocWorker(shard int) {
			
 
				+	for {
			
 
				+		request := <-engine.indexerRemoveDocChannels[shard]
			
 
				+		engine.indexers[shard].RemoveDocumentToCache(request.docId, request.forceUpdate)
			
 
				+		if request.docId != 0 {
			
 
				+			atomic.AddUint64(&engine.numDocumentsRemoved, 1)
			
 
				+		}
			
 
				+		if request.forceUpdate {
			
 
				+			atomic.AddUint64(&engine.numDocumentsForceUpdated, 1)
			
 
				+		}
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -79,10 +99,3 @@ func (engine *Engine) indexerLookupWorker(shard int) {
 
				 		engine.rankerRankChannels[shard] <- rankerRequest
			
 
				 	}
			
 
				 }
			
 
				-
			
 
				-func (engine *Engine) indexerRemoveDocWorker(shard int) {
			
 
				-	for {
			
 
				-		request := <-engine.indexerRemoveDocChannels[shard]
			
 
				-		engine.indexers[shard].RemoveDoc(request.docId)
			
 
				-	}
			
 
				-}
			
--- a/engine/persistent_storage_worker.go
+++ b/engine/persistent_storage_worker.go
@@ -58,7 +58,7 @@ func (engine *Engine) persistentStorageInitWorker(shard int) {
 
				 		err := dec.Decode(&data)
			
 
				 		if err == nil {
			
 
				 			// 添加索引
			
 
				-			engine.internalIndexDocument(docId, data)
			
 
				+			engine.internalIndexDocument(docId, data, false)
			
 
				 		}
			
 
				 		return nil
			
 
				 	})
			
--- a/engine/segmenter_worker.go
+++ b/engine/segmenter_worker.go
@@ -5,16 +5,25 @@ import (
 
				 )
			
 
				 
			
 
				 type segmenterRequest struct {
			
 
				-	docId uint64
			
 
				-	hash  uint32
			
 
				-	data  types.DocumentIndexData
			
 
				+	docId       uint64
			
 
				+	hash        uint32
			
 
				+	data        types.DocumentIndexData
			
 
				+	forceUpdate bool
			
 
				 }
			
 
				 
			
 
				 func (engine *Engine) segmenterWorker() {
			
 
				 	for {
			
 
				 		request := <-engine.segmenterChannel
			
 
				-		shard := engine.getShard(request.hash)
			
 
				+		if request.docId == 0 {
			
 
				+			if request.forceUpdate {
			
 
				+				for i := 0; i < engine.initOptions.NumShards; i++ {
			
 
				+					engine.indexerAddDocChannels[i] <- indexerAddDocumentRequest{forceUpdate: true}
			
 
				+				}
			
 
				+			}
			
 
				+			continue
			
 
				+		}
			
 
				 
			
 
				+		shard := engine.getShard(request.hash)
			
 
				 		tokensMap := make(map[string][]int)
			
 
				 		numTokens := 0
			
 
				 		if !engine.initOptions.NotUsingSegmenter && request.data.Content != "" {
			
@@ -54,6 +63,7 @@ func (engine *Engine) segmenterWorker() {
 
				 				TokenLength: float32(numTokens),
			
 
				 				Keywords:    make([]types.KeywordIndex, len(tokensMap)),
			
 
				 			},
			
 
				+			forceUpdate: request.forceUpdate,
			
 
				 		}
			
 
				 		iTokens := 0
			
 
				 		for k, v := range tokensMap {
			
@@ -64,7 +74,16 @@ func (engine *Engine) segmenterWorker() {
 
				 				Starts:    v}
			
 
				 			iTokens++
			
 
				 		}
			
 
				-		engine.indexerAddDocumentChannels[shard] <- indexerRequest
			
 
				+
			
 
				+		engine.indexerAddDocChannels[shard] <- indexerRequest
			
 
				+		if request.forceUpdate {
			
 
				+			for i := 0; i < engine.initOptions.NumShards; i++ {
			
 
				+				if i == shard {
			
 
				+					continue
			
 
				+				}
			
 
				+				engine.indexerAddDocChannels[i] <- indexerAddDocumentRequest{forceUpdate: true}
			
 
				+			}
			
 
				+		}
			
 
				 		rankerRequest := rankerAddDocRequest{
			
 
				 			docId: request.docId, fields: request.data.Fields}
			
 
				 		engine.rankerAddDocChannels[shard] <- rankerRequest
			
--- a/examples/benchmark.go
+++ b/examples/benchmark.go
@@ -39,6 +39,7 @@ var (
 
				 	cpuprofile                = flag.String("cpuprofile", "", "处理器profile文件")
			
 
				 	memprofile                = flag.String("memprofile", "", "内存profile文件")
			
 
				 	num_repeat_text           = flag.Int("num_repeat_text", 10, "文本重复加入多少次")
			
 
				+	num_delete_docs           = flag.Int("num_delete_docs", 1000, "测试删除文档的个数")
			
 
				 	index_type                = flag.Int("index_type", types.DocIdsIndex, "索引类型")
			
 
				 	use_persistent            = flag.Bool("use_persistent", false, "是否使用持久存储")
			
 
				 	persistent_storage_folder = flag.String("persistent_storage_folder", "benchmark.persistent", "持久存储数据库保存的目录")
			
@@ -123,7 +124,7 @@ func main() {
 
				 	for i := 0; i < *num_repeat_text; i++ {
			
 
				 		for _, line := range lines {
			
 
				 			searcher.IndexDocument(docId, types.DocumentIndexData{
			
 
				-				Content: line})
			
 
				+				Content: line}, false)
			
 
				 			docId++
			
 
				 			if docId-docId/1000000*1000000 == 0 {
			
 
				 				log.Printf("已索引%d百万文档", docId/1000000)
			
@@ -150,28 +151,39 @@ func main() {
 
				 		defer f.Close()
			
 
				 	}
			
 
				 
			
 
				-	// 记录时间
			
 
				+	// 记录时间并计算删除索引时间
			
 
				 	t2 := time.Now()
			
 
				+	for i := 1; i <= *num_delete_docs; i++ {
			
 
				+		searcher.RemoveDocument(uint64(i), false)
			
 
				+	}
			
 
				+	searcher.FlushIndex()
			
 
				+	t3 := time.Now()
			
 
				+	log.Printf("删除 %d 条索引花费时间 %v", *num_delete_docs, t3.Sub(t2))
			
 
				 
			
 
				 	done := make(chan bool)
			
 
				+	recordResponseLength := make(map[string]int)
			
 
				 	for iThread := 0; iThread < numQueryThreads; iThread++ {
			
 
				-		go search(done)
			
 
				+		go search(done, recordResponseLength)
			
 
				 	}
			
 
				 	for iThread := 0; iThread < numQueryThreads; iThread++ {
			
 
				 		<-done
			
 
				 	}
			
 
				+	// 测试搜索结果输出，因为不同 case 的 docId 对应不上，所以只测试总数
			
 
				+	for keyword, count := range recordResponseLength {
			
 
				+		log.Printf("关键词 [%s] 共搜索到 %d 个相关文档", keyword, count)
			
 
				+	}
			
 
				 
			
 
				 	// 记录时间并计算分词速度
			
 
				-	t3 := time.Now()
			
 
				+	t4 := time.Now()
			
 
				 	log.Printf("搜索平均响应时间 %v 毫秒",
			
 
				-		t3.Sub(t2).Seconds()*1000/float64(numRepeatQuery*len(searchQueries)))
			
 
				+		t4.Sub(t3).Seconds()*1000/float64(numRepeatQuery*len(searchQueries)))
			
 
				 	log.Printf("搜索吞吐量每秒 %v 次查询",
			
 
				 		float64(numRepeatQuery*numQueryThreads*len(searchQueries))/
			
 
				-			t3.Sub(t2).Seconds())
			
 
				+			t4.Sub(t3).Seconds())
			
 
				 
			
 
				 	if *use_persistent {
			
 
				 		searcher.Close()
			
 
				-		t4 := time.Now()
			
 
				+		t5 := time.Now()
			
 
				 		searcher1 := engine.Engine{}
			
 
				 		searcher1.Init(types.EngineInitOptions{
			
 
				 			SegmenterDictionaries: *dictionaries,
			
@@ -186,8 +198,8 @@ func main() {
 
				 			PersistentStorageShards: *persistent_storage_shards,
			
 
				 		})
			
 
				 		defer searcher1.Close()
			
 
				-		t5 := time.Now()
			
 
				-		t := t5.Sub(t4).Seconds() - tEndInit.Sub(tBeginInit).Seconds()
			
 
				+		t6 := time.Now()
			
 
				+		t := t6.Sub(t5).Seconds() - tEndInit.Sub(tBeginInit).Seconds()
			
 
				 		log.Print("从持久存储加入的索引总数", searcher1.NumTokenIndexAdded())
			
 
				 		log.Printf("从持久存储建立索引花费时间 %v 秒", t)
			
 
				 		log.Printf("从持久存储建立索引速度每秒添加 %f 百万个索引",
			
@@ -197,10 +209,13 @@ func main() {
 
				 	//os.RemoveAll(*persistent_storage_folder)
			
 
				 }
			
 
				 
			
 
				-func search(ch chan bool) {
			
 
				+func search(ch chan bool, record map[string]int) {
			
 
				 	for i := 0; i < numRepeatQuery; i++ {
			
 
				 		for _, query := range searchQueries {
			
 
				-			searcher.Search(types.SearchRequest{Text: query})
			
 
				+			output := searcher.Search(types.SearchRequest{Text: query})
			
 
				+			if _, found := record[query]; !found {
			
 
				+				record[query] = len(output.Docs)
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 	ch <- true
			
--- a/types/index.go
+++ b/types/index.go
@@ -42,3 +42,29 @@ type IndexedDocument struct {
 
				 	// 仅当索引类型为LocationsIndex时返回有效值。
			
 
				 	TokenLocations [][]int
			
 
				 }
			
 
				+
			
 
				+// 方便批量加入文档索引
			
 
				+type DocumentsIndex []*DocumentIndex
			
 
				+
			
 
				+func (docs DocumentsIndex) Len() int {
			
 
				+	return len(docs)
			
 
				+}
			
 
				+func (docs DocumentsIndex) Swap(i, j int) {
			
 
				+	docs[i], docs[j] = docs[j], docs[i]
			
 
				+}
			
 
				+func (docs DocumentsIndex) Less(i, j int) bool {
			
 
				+	return docs[i].DocId < docs[j].DocId
			
 
				+}
			
 
				+
			
 
				+// 方便批量删除文档索引
			
 
				+type DocumentsId []uint64
			
 
				+
			
 
				+func (docs DocumentsId) Len() int {
			
 
				+	return len(docs)
			
 
				+}
			
 
				+func (docs DocumentsId) Swap(i, j int) {
			
 
				+	docs[i], docs[j] = docs[j], docs[i]
			
 
				+}
			
 
				+func (docs DocumentsId) Less(i, j int) bool {
			
 
				+	return docs[i] < docs[j]
			
 
				+}
			
--- a/types/indexer_init_options.go
+++ b/types/indexer_init_options.go
@@ -11,6 +11,9 @@ const (
 
				 	// 存储关键词在文档中出现的具体字节位置（可能有多个）
			
 
				 	// 如果你希望得到关键词紧邻度数据，必须使用LocationsIndex类型的索引
			
 
				 	LocationsIndex = 2
			
 
				+
			
 
				+	// 默认插入索引表文档 CACHE SIZE
			
 
				+	defaultDocCacheSize = 300000
			
 
				 )
			
 
				 
			
 
				 // 初始化索引器选项
			
@@ -18,6 +21,9 @@ type IndexerInitOptions struct {
 
				 	// 索引表的类型，见上面的常数
			
 
				 	IndexType int
			
 
				 
			
 
				+	// 待插入索引表文档 CACHE SIZE
			
 
				+	DocCacheSize int
			
 
				+
			
 
				 	// BM25参数
			
 
				 	BM25Parameters *BM25Parameters
			
 
				 }
			
@@ -28,3 +34,9 @@ type BM25Parameters struct {
 
				 	K1 float32
			
 
				 	B  float32
			
 
				 }
			
 
				+
			
 
				+func (options *IndexerInitOptions) Init() {
			
 
				+	if options.DocCacheSize == 0 {
			
 
				+		options.DocCacheSize = defaultDocCacheSize
			
 
				+	}
			
 
				+}