Browse Source

DocIds从数组变为map

geili 10 years ago
parent
commit
f14937a288
5 changed files with 46 additions and 12 deletions
  1. 2 2
      core/indexer.go
  2. 1 1
      core/indexer_test.go
  3. 38 0
      engine/engine_test.go
  4. 3 7
      engine/indexer_worker.go
  5. 2 2
      types/search_request.go

+ 2 - 2
core/indexer.go

@@ -128,7 +128,7 @@ func (indexer *Indexer) AddDocument(document *types.DocumentIndex) {
 // 查找包含全部搜索键(AND操作)的文档
 // 当docIds不为nil时仅从docIds指定的文档中查找
 func (indexer *Indexer) Lookup(
-	tokens []string, labels []string, docIds *map[uint64]bool) (docs []types.IndexedDocument) {
+	tokens []string, labels []string, docIds map[uint64]bool) (docs []types.IndexedDocument) {
 	if indexer.initialized == false {
 		log.Fatal("索引器尚未初始化")
 	}
@@ -174,7 +174,7 @@ func (indexer *Indexer) Lookup(
 		baseDocId := indexer.getDocId(table[0], indexPointers[0])
 
 		if docIds != nil {
-			_, found := (*docIds)[baseDocId]
+			_, found := docIds[baseDocId]
 			if !found {
 				continue
 			}

+ 1 - 1
core/indexer_test.go

@@ -351,7 +351,7 @@ func TestLookupWithinDocIds(t *testing.T) {
 	docIds[0] = true
 	docIds[2] = true
 	utils.Expect(t, "[2 0 [7]] [0 0 [0]] ",
-		indexedDocsToString(indexer.Lookup([]string{"token2"}, []string{}, &docIds)))
+		indexedDocsToString(indexer.Lookup([]string{"token2"}, []string{}, docIds)))
 }
 
 func TestLookupWithLocations(t *testing.T) {

+ 38 - 0
engine/engine_test.go

@@ -386,3 +386,41 @@ func TestCountDocsOnly(t *testing.T) {
 	utils.Expect(t, "2", len(outputs.Tokens))
 	utils.Expect(t, "2", outputs.NumDocs)
 }
+
+func TestSearchWithin(t *testing.T) {
+	var engine Engine
+	engine.Init(types.EngineInitOptions{
+		SegmenterDictionaries: "../testdata/test_dict.txt",
+		DefaultRankOptions: &types.RankOptions{
+			ReverseOrder:    true,
+			OutputOffset:    0,
+			MaxOutputs:      10,
+			ScoringCriteria: &RankByTokenProximity{},
+		},
+		IndexerInitOptions: &types.IndexerInitOptions{
+			IndexType: types.LocationsIndex,
+		},
+	})
+
+	AddDocs(&engine)
+
+	docIds := make(map[uint64]bool)
+	docIds[4] = true
+	docIds[0] = true
+	outputs := engine.Search(types.SearchRequest{
+		Text:   "中国人口",
+		DocIds: docIds,
+	})
+	utils.Expect(t, "2", len(outputs.Tokens))
+	utils.Expect(t, "中国", outputs.Tokens[0])
+	utils.Expect(t, "人口", outputs.Tokens[1])
+	utils.Expect(t, "2", len(outputs.Docs))
+
+	utils.Expect(t, "0", outputs.Docs[0].DocId)
+	utils.Expect(t, "76", int(outputs.Docs[0].Scores[0]*1000))
+	utils.Expect(t, "[0 18]", outputs.Docs[0].TokenSnippetLocations)
+
+	utils.Expect(t, "4", outputs.Docs[1].DocId)
+	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
+	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
+}

+ 3 - 7
engine/indexer_worker.go

@@ -13,7 +13,7 @@ type indexerLookupRequest struct {
 	countDocsOnly       bool
 	tokens              []string
 	labels              []string
-	docIds              []uint64
+	docIds              map[uint64]bool
 	options             types.RankOptions
 	rankerReturnChannel chan rankerReturnRequest
 }
@@ -33,14 +33,10 @@ func (engine *Engine) indexerLookupWorker(shard int) {
 		request := <-engine.indexerLookupChannels[shard]
 
 		var docs []types.IndexedDocument
-		if len(request.docIds) == 0 {
+		if request.docIds == nil {
 			docs = engine.indexers[shard].Lookup(request.tokens, request.labels, nil)
 		} else {
-			docIds := make(map[uint64]bool)
-			for _, ids := range request.docIds {
-				docIds[ids] = true
-			}
-			docs = engine.indexers[shard].Lookup(request.tokens, request.labels, &docIds)
+			docs = engine.indexers[shard].Lookup(request.tokens, request.labels, request.docIds)
 		}
 
 		if len(docs) == 0 {

+ 2 - 2
types/search_request.go

@@ -12,8 +12,8 @@ type SearchRequest struct {
 	// 文档标签(必须是UTF-8格式),标签不存在文档文本中,但也属于搜索键的一种
 	Labels []string
 
-	// 当不为空时,仅从这些文档中搜索
-	DocIds []uint64
+	// 当不为nil时,仅从这些DocIds包含的键中搜索(忽略值)
+	DocIds map[uint64]bool
 
 	// 排序选项
 	RankOptions *RankOptions