9 年之前 · cd403ec95d
--- a/README.md
+++ b/README.md
@@ -45,9 +45,9 @@ func main() {
 
				 	defer searcher.Close()
			
 
				 
			
 
				 	// 将文档加入索引
			
 
				-	searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"})
			
 
				-	searcher.IndexDocument(1, types.DocumentIndexData{Content: "百度宣布拟全资收购91无线业务"})
			
 
				-	searcher.IndexDocument(2, types.DocumentIndexData{Content: "百度是中国最大的搜索引擎"})
			
 
				+	searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"}, true)
			
 
				+	searcher.IndexDocument(1, types.DocumentIndexData{Content: "百度宣布拟全资收购91无线业务"}, true)
			
 
				+	searcher.IndexDocument(2, types.DocumentIndexData{Content: "百度是中国最大的搜索引擎"}, true)
			
 
				 
			
 
				 	// 等待索引刷新完毕
			
 
				 	searcher.FlushIndex()
			
--- a/core/indexer_test.go
+++ b/core/indexer_test.go
@@ -3,7 +3,6 @@ package core
 
				 import (
			
 
				 	"testing"
			
 
				 
			
 
				-	"github.com/huichen/wukong/engine"
			
 
				 	"github.com/huichen/wukong/types"
			
 
				 	"github.com/huichen/wukong/utils"
			
 
				 )
			
@@ -442,61 +441,3 @@ func TestLookupWithLocations(t *testing.T) {
 
				 	docs, _ := indexer.Lookup([]string{"token2", "token3"}, []string{}, nil, false)
			
 
				 	utils.Expect(t, "[[0 21] [28]]", docs[0].TokenLocations)
			
 
				 }
			
 
				-
			
 
				-func TestLookupWithLocations1(t *testing.T) {
			
 
				-
			
 
				-	type Data struct {
			
 
				-		Id      int
			
 
				-		Content string
			
 
				-		Labels  []string
			
 
				-	}
			
 
				-
			
 
				-	datas := make([]Data, 0)
			
 
				-
			
 
				-	data0 := Data{Id: 0, Content: "此次百度收购将成中国互联网最大并购", Labels: []string{"百度", "中国"}}
			
 
				-	datas = append(datas, data0)
			
 
				-
			
 
				-	data1 := Data{Id: 1, Content: "百度宣布拟全资收购91无线业务", Labels: []string{"百度"}}
			
 
				-	datas = append(datas, data1)
			
 
				-
			
 
				-	data2 := Data{Id: 2, Content: "百度是中国最大的搜索引擎", Labels: []string{"百度"}}
			
 
				-	datas = append(datas, data2)
			
 
				-
			
 
				-	data3 := Data{Id: 3, Content: "百度在研制无人汽车", Labels: []string{"百度"}}
			
 
				-	datas = append(datas, data3)
			
 
				-
			
 
				-	data4 := Data{Id: 4, Content: "BAT是中国互联网三巨头", Labels: []string{"百度"}}
			
 
				-	datas = append(datas, data4)
			
 
				-
			
 
				-	// 初始化
			
 
				-	searcher_locations := engine.Engine{}
			
 
				-	searcher_locations.Init(types.EngineInitOptions{
			
 
				-		SegmenterDictionaries: "../data/dictionary.txt",
			
 
				-		IndexerInitOptions: &types.IndexerInitOptions{
			
 
				-			IndexType: types.LocationsIndex,
			
 
				-		},
			
 
				-	})
			
 
				-	defer searcher_locations.Close()
			
 
				-	for _, data := range datas {
			
 
				-		searcher_locations.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels})
			
 
				-	}
			
 
				-	searcher_locations.FlushIndex()
			
 
				-	res_locations := searcher_locations.Search(types.SearchRequest{Text: "百度"})
			
 
				-
			
 
				-	searcher_docids := engine.Engine{}
			
 
				-	searcher_docids.Init(types.EngineInitOptions{
			
 
				-		SegmenterDictionaries: "../data/dictionary.txt",
			
 
				-		IndexerInitOptions: &types.IndexerInitOptions{
			
 
				-			IndexType: types.DocIdsIndex,
			
 
				-		},
			
 
				-	})
			
 
				-	defer searcher_docids.Close()
			
 
				-	for _, data := range datas {
			
 
				-		searcher_docids.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels})
			
 
				-	}
			
 
				-	searcher_docids.FlushIndex()
			
 
				-	res_docids := searcher_docids.Search(types.SearchRequest{Text: "百度"})
			
 
				-	if res_docids.NumDocs != res_locations.NumDocs {
			
 
				-		t.Errorf("期待的搜索结果个数=\"%d\", 实际=\"%d\"", res_docids.NumDocs, res_locations.NumDocs)
			
 
				-	}
			
 
				-}
			
--- a/engine/engine.go
+++ b/engine/engine.go
@@ -222,8 +222,9 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 
				 // 将文档加入索引
			
 
				 //
			
 
				 // 输入参数：
			
 
				-//  docId	标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
			
 
				-//  data	见DocumentIndexData注释
			
 
				+//  docId	      标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
			
 
				+//  data	      见DocumentIndexData注释
			
 
				+//  forceUpdate 是否强制刷新 cache，如果设为 true，则尽快添加到索引，否则等待 cache 满之后一次全量添加
			
 
				 //
			
 
				 // 注意：
			
 
				 //      1. 这个函数是线程安全的，请尽可能并发调用以提高索引速度
			
@@ -258,7 +259,8 @@ func (engine *Engine) internalIndexDocument(
 
				 // 将文档从索引中删除
			
 
				 //
			
 
				 // 输入参数：
			
 
				-//  docId	标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
			
 
				+//  docId	      标识文档编号，必须唯一，docId == 0 表示非法文档（用于强制刷新索引），[1, +oo) 表示合法文档
			
 
				+//  forceUpdate 是否强制刷新 cache，如果设为 true，则尽快删除索引，否则等待 cache 满之后一次全量删除
			
 
				 //
			
 
				 // 注意：
			
 
				 //      1. 这个函数是线程安全的，请尽可能并发调用以提高索引速度
			
--- a/engine/engine_test.go
+++ b/engine/engine_test.go
@@ -426,3 +426,61 @@ func TestSearchWithin(t *testing.T) {
 
				 	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
			
 
				 	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
			
 
				 }
			
 
				+
			
 
				+func TestLookupWithLocations1(t *testing.T) {
			
 
				+
			
 
				+	type Data struct {
			
 
				+		Id      int
			
 
				+		Content string
			
 
				+		Labels  []string
			
 
				+	}
			
 
				+
			
 
				+	datas := make([]Data, 0)
			
 
				+
			
 
				+	data0 := Data{Id: 0, Content: "此次百度收购将成中国互联网最大并购", Labels: []string{"百度", "中国"}}
			
 
				+	datas = append(datas, data0)
			
 
				+
			
 
				+	data1 := Data{Id: 1, Content: "百度宣布拟全资收购91无线业务", Labels: []string{"百度"}}
			
 
				+	datas = append(datas, data1)
			
 
				+
			
 
				+	data2 := Data{Id: 2, Content: "百度是中国最大的搜索引擎", Labels: []string{"百度"}}
			
 
				+	datas = append(datas, data2)
			
 
				+
			
 
				+	data3 := Data{Id: 3, Content: "百度在研制无人汽车", Labels: []string{"百度"}}
			
 
				+	datas = append(datas, data3)
			
 
				+
			
 
				+	data4 := Data{Id: 4, Content: "BAT是中国互联网三巨头", Labels: []string{"百度"}}
			
 
				+	datas = append(datas, data4)
			
 
				+
			
 
				+	// 初始化
			
 
				+	searcher_locations := Engine{}
			
 
				+	searcher_locations.Init(types.EngineInitOptions{
			
 
				+		SegmenterDictionaries: "../data/dictionary.txt",
			
 
				+		IndexerInitOptions: &types.IndexerInitOptions{
			
 
				+			IndexType: types.LocationsIndex,
			
 
				+		},
			
 
				+	})
			
 
				+	defer searcher_locations.Close()
			
 
				+	for _, data := range datas {
			
 
				+		searcher_locations.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels}, true)
			
 
				+	}
			
 
				+	searcher_locations.FlushIndex()
			
 
				+	res_locations := searcher_locations.Search(types.SearchRequest{Text: "百度"})
			
 
				+
			
 
				+	searcher_docids := Engine{}
			
 
				+	searcher_docids.Init(types.EngineInitOptions{
			
 
				+		SegmenterDictionaries: "../data/dictionary.txt",
			
 
				+		IndexerInitOptions: &types.IndexerInitOptions{
			
 
				+			IndexType: types.DocIdsIndex,
			
 
				+		},
			
 
				+	})
			
 
				+	defer searcher_docids.Close()
			
 
				+	for _, data := range datas {
			
 
				+		searcher_docids.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels}, true)
			
 
				+	}
			
 
				+	searcher_docids.FlushIndex()
			
 
				+	res_docids := searcher_docids.Search(types.SearchRequest{Text: "百度"})
			
 
				+	if res_docids.NumDocs != res_locations.NumDocs {
			
 
				+		t.Errorf("期待的搜索结果个数=\"%d\", 实际=\"%d\"", res_docids.NumDocs, res_locations.NumDocs)
			
 
				+	}
			
 
				+}