Browse Source

1. 删除 examples/enjoy_wukong.go

2. 整理 engine/engine_test.go
Zacky Su 9 years ago
parent
commit
e6d034149c
2 changed files with 62 additions and 132 deletions
  1. 62 65
      engine/engine_test.go
  2. 0 67
      examples/enjoy_wukong.go

+ 62 - 65
engine/engine_test.go

@@ -15,7 +15,6 @@ type ScoringFields struct {
 
 func AddDocs(engine *Engine) {
 	docId := uint64(1)
-	// 因为需要保证文档全部被加入到索引中,所以 forceUpdate 全部设置成 true
 	engine.IndexDocument(docId, types.DocumentIndexData{
 		Content: "中国有十三亿人口人口",
 		Fields:  ScoringFields{1, 2, 3},
@@ -43,6 +42,35 @@ func AddDocs(engine *Engine) {
 	engine.FlushIndex()
 }
 
+func addDocsWithLabels(engine *Engine) {
+	docId := uint64(1)
+	engine.IndexDocument(docId, types.DocumentIndexData{
+		Content: "此次百度收购将成中国互联网最大并购",
+		Labels:  []string{"百度", "中国"},
+	}, false)
+	docId++
+	engine.IndexDocument(docId, types.DocumentIndexData{
+		Content: "百度宣布拟全资收购91无线业务",
+		Labels:  []string{"百度"},
+	}, false)
+	docId++
+	engine.IndexDocument(docId, types.DocumentIndexData{
+		Content: "百度是中国最大的搜索引擎",
+		Labels:  []string{"百度"},
+	}, false)
+	docId++
+	engine.IndexDocument(docId, types.DocumentIndexData{
+		Content: "百度在研制无人汽车",
+		Labels:  []string{"百度"},
+	}, false)
+	docId++
+	engine.IndexDocument(docId, types.DocumentIndexData{
+		Content: "BAT是中国互联网三巨头",
+		Labels:  []string{"百度"},
+	}, false)
+	engine.FlushIndex()
+}
+
 type RankByTokenProximity struct {
 }
 
@@ -241,7 +269,7 @@ func TestRemoveDocument(t *testing.T) {
 
 	AddDocs(&engine)
 	engine.RemoveDocument(5, false)
-	engine.RemoveDocument(6, true)
+	engine.RemoveDocument(6, false)
 	engine.FlushIndex()
 	engine.IndexDocument(6, types.DocumentIndexData{
 		Content: "中国人口有十三亿",
@@ -280,7 +308,7 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 			{"人口", []int{18, 24}},
 		},
 		Fields: ScoringFields{1, 2, 3},
-	}, true)
+	}, false)
 	docId++
 	engine.IndexDocument(docId, types.DocumentIndexData{
 		Content: "",
@@ -289,13 +317,12 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 			{"人口", []int{6}},
 		},
 		Fields: ScoringFields{1, 2, 3},
-	}, true)
+	}, false)
 	docId++
 	engine.IndexDocument(docId, types.DocumentIndexData{
 		Content: "中国十三亿人口",
 		Fields:  ScoringFields{0, 9, 1},
-	}, true)
-
+	}, false)
 	engine.FlushIndex()
 
 	outputs := engine.Search(types.SearchRequest{Text: "中国人口"})
@@ -317,6 +344,34 @@ func TestEngineIndexDocumentWithTokens(t *testing.T) {
 	utils.Expect(t, "[0 18]", outputs.Docs[2].TokenSnippetLocations)
 }
 
+func TestEngineIndexDocumentWithContentAndLabels(t *testing.T) {
+	var engine1, engine2 Engine
+	engine1.Init(types.EngineInitOptions{
+		SegmenterDictionaries: "../data/dictionary.txt",
+		IndexerInitOptions: &types.IndexerInitOptions{
+			IndexType: types.LocationsIndex,
+		},
+	})
+	engine2.Init(types.EngineInitOptions{
+		SegmenterDictionaries: "../data/dictionary.txt",
+		IndexerInitOptions: &types.IndexerInitOptions{
+			IndexType: types.DocIdsIndex,
+		},
+	})
+
+	addDocsWithLabels(&engine1)
+	addDocsWithLabels(&engine2)
+
+	outputs1 := engine1.Search(types.SearchRequest{Text: "百度"})
+	outputs2 := engine2.Search(types.SearchRequest{Text: "百度"})
+	utils.Expect(t, "1", len(outputs1.Tokens))
+	utils.Expect(t, "1", len(outputs2.Tokens))
+	utils.Expect(t, "百度", outputs1.Tokens[0])
+	utils.Expect(t, "百度", outputs2.Tokens[0])
+	utils.Expect(t, "5", len(outputs1.Docs))
+	utils.Expect(t, "5", len(outputs2.Docs))
+}
+
 func TestEngineIndexDocumentWithPersistentStorage(t *testing.T) {
 	gob.Register(ScoringFields{})
 	var engine Engine
@@ -389,7 +444,7 @@ func TestCountDocsOnly(t *testing.T) {
 	})
 
 	AddDocs(&engine)
-	engine.RemoveDocument(5, true)
+	engine.RemoveDocument(5, false)
 	engine.FlushIndex()
 
 	outputs := engine.Search(types.SearchRequest{Text: "中国人口", CountDocsOnly: true})
@@ -435,61 +490,3 @@ func TestSearchWithin(t *testing.T) {
 	utils.Expect(t, "100", int(outputs.Docs[1].Scores[0]*1000))
 	utils.Expect(t, "[0 15]", outputs.Docs[1].TokenSnippetLocations)
 }
-
-func TestLookupWithLocations1(t *testing.T) {
-
-	type Data struct {
-		Id      int
-		Content string
-		Labels  []string
-	}
-
-	datas := make([]Data, 0)
-
-	data0 := Data{Id: 0, Content: "此次百度收购将成中国互联网最大并购", Labels: []string{"百度", "中国"}}
-	datas = append(datas, data0)
-
-	data1 := Data{Id: 1, Content: "百度宣布拟全资收购91无线业务", Labels: []string{"百度"}}
-	datas = append(datas, data1)
-
-	data2 := Data{Id: 2, Content: "百度是中国最大的搜索引擎", Labels: []string{"百度"}}
-	datas = append(datas, data2)
-
-	data3 := Data{Id: 3, Content: "百度在研制无人汽车", Labels: []string{"百度"}}
-	datas = append(datas, data3)
-
-	data4 := Data{Id: 4, Content: "BAT是中国互联网三巨头", Labels: []string{"百度"}}
-	datas = append(datas, data4)
-
-	// 初始化
-	searcher_locations := Engine{}
-	searcher_locations.Init(types.EngineInitOptions{
-		SegmenterDictionaries: "../data/dictionary.txt",
-		IndexerInitOptions: &types.IndexerInitOptions{
-			IndexType: types.LocationsIndex,
-		},
-	})
-	defer searcher_locations.Close()
-	for _, data := range datas {
-		searcher_locations.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels}, true)
-	}
-	searcher_locations.FlushIndex()
-	res_locations := searcher_locations.Search(types.SearchRequest{Text: "百度"})
-
-	searcher_docids := Engine{}
-	searcher_docids.Init(types.EngineInitOptions{
-		SegmenterDictionaries: "../data/dictionary.txt",
-		IndexerInitOptions: &types.IndexerInitOptions{
-			IndexType: types.DocIdsIndex,
-		},
-	})
-	defer searcher_docids.Close()
-	for _, data := range datas {
-		searcher_docids.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels}, true)
-	}
-	searcher_docids.FlushIndex()
-	res_docids := searcher_docids.Search(types.SearchRequest{Text: "百度"})
-	if res_docids.NumDocs != res_locations.NumDocs {
-		t.Errorf("期待的搜索结果个数=\"%d\", 实际=\"%d\"", res_docids.NumDocs, res_locations.NumDocs)
-	}
-}

+ 0 - 67
examples/enjoy_wukong.go

@@ -1,67 +0,0 @@
-package main
-
-import (
-	"log"
-
-	"github.com/huichen/wukong/engine"
-	"github.com/huichen/wukong/types"
-)
-
-var (
-	searcher = engine.Engine{}
-)
-
-type Data struct {
-	Id      int
-	Content string
-	Labels  []string
-}
-
-func (d *Data) Print() {
-	log.Println(d.Id, d.Content, d.Labels)
-}
-
-func main() {
-	datas := []Data{}
-
-	data0 := Data{Id: 1, Content: "此次百度收购将成中国互联网最大并购", Labels: []string{"百度", "中国"}}
-	datas = append(datas, data0)
-
-	data1 := Data{Id: 2, Content: "百度宣布拟全资收购91无线业务", Labels: []string{"百度"}}
-	datas = append(datas, data1)
-
-	data2 := Data{Id: 3, Content: "百度是中国最大的搜索引擎", Labels: []string{"百度"}}
-	datas = append(datas, data2)
-
-	data3 := Data{Id: 4, Content: "百度在研制无人汽车", Labels: []string{"百度"}}
-	datas = append(datas, data3)
-
-	data4 := Data{Id: 5, Content: "BAT是中国互联网三巨头", Labels: []string{"百度"}}
-	datas = append(datas, data4)
-
-	// 初始化
-	searcher.Init(types.EngineInitOptions{
-		SegmenterDictionaries: "../data/dictionary.txt",
-		IndexerInitOptions: &types.IndexerInitOptions{
-			IndexType: types.LocationsIndex,
-			//IndexType: types.FrequenciesIndex,
-			//IndexType: types.DocIdsIndex,
-		},
-	})
-	defer searcher.Close()
-
-	// 将文档加入索引
-	for _, data := range datas {
-		searcher.IndexDocument(uint64(data.Id), types.DocumentIndexData{Content: data.Content, Labels: data.Labels}, false)
-	}
-
-	// 等待索引刷新完毕
-	searcher.FlushIndex()
-
-	// 搜索输出格式见types.SearchResponse结构体
-	res := searcher.Search(types.SearchRequest{Text: "百度"})
-	log.Println("关键字", res.Tokens, "共有", res.NumDocs, "条搜索结果")
-	for i := range res.Docs {
-		datas[res.Docs[i].DocId-1].Print()
-	}
-}