Hui Chen 12 роки тому
батько
коміт
602f3675e0

+ 1 - 1
docs/persistent_storage.md

@@ -21,7 +21,7 @@ type EngineInitOptions struct {
 或者词典有变化,这些变化会体现在启动后的引擎索引表中。
 2. 在调用engine.IndexDocument时,引擎将索引数据写入到PersistentStorageFolder指定
 的目录中。
-3. PersistentStorageShards定义了数据库裂分数目,默认为CPU数目。
+3. PersistentStorageShards定义了数据库裂分数目,默认为CPU数目。为了得到最好的性能,请调整这个参数使得每个裂分文件小于100M。
 4. 在调用engine.RemoveDocument删除一个文档后,该文档会从持久存储中剔除,下次启动
 引擎时不会载入该文档。
 

+ 13 - 1
engine/engine.go

@@ -19,6 +19,7 @@ import (
 
 const (
 	NumNanosecondsInAMillisecond = 1000000
+	PersistentStorageFilePrefix  = "wukong"
 )
 
 type Engine struct {
@@ -155,7 +156,7 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 		// 打开或者创建数据库
 		engine.dbs = make([]*kv.DB, engine.initOptions.PersistentStorageShards)
 		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
-			dbPath := engine.initOptions.PersistentStorageFolder + "/persist." + strconv.Itoa(shard) + "-of-" + strconv.Itoa(engine.initOptions.PersistentStorageShards)
+			dbPath := engine.initOptions.PersistentStorageFolder + "/" + PersistentStorageFilePrefix + "." + strconv.Itoa(shard)
 			db, err := utils.OpenOrCreateKv(dbPath, &kv.Options{})
 			if db == nil || err != nil {
 				log.Fatal("无法打开数据库", dbPath, ": ", err)
@@ -179,6 +180,17 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 			}
 		}
 
+		// 关闭并重新打开数据库
+		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
+			engine.dbs[shard].Close()
+			dbPath := engine.initOptions.PersistentStorageFolder + "/" + PersistentStorageFilePrefix + "." + strconv.Itoa(shard)
+			db, err := utils.OpenOrCreateKv(dbPath, &kv.Options{})
+			if db == nil || err != nil {
+				log.Fatal("无法打开数据库", dbPath, ": ", err)
+			}
+			engine.dbs[shard] = db
+		}
+
 		for shard := 0; shard < engine.initOptions.PersistentStorageShards; shard++ {
 			go engine.persistentStorageIndexDocumentWorker(shard)
 		}

+ 2 - 2
examples/benchmark.go

@@ -189,12 +189,12 @@ func main() {
 		t5 := time.Now()
 		t := t5.Sub(t4).Seconds() - tEndInit.Sub(tBeginInit).Seconds()
 		log.Print("从持久存储加入的索引总数", searcher1.NumTokenIndexAdded())
-		log.Printf("从持久存储建立索引花费时间 %v", t)
+		log.Printf("从持久存储建立索引花费时间 %v", t)
 		log.Printf("从持久存储建立索引速度每秒添加 %f 百万个索引",
 			float64(searcher1.NumTokenIndexAdded())/t/(1000000))
 
 	}
-	os.RemoveAll(*persistent_storage_folder)
+	//os.RemoveAll(*persistent_storage_folder)
 }
 
 func search(ch chan bool) {

+ 13 - 0
examples/codelab/search_server.go

@@ -3,6 +3,7 @@ package main
 
 import (
 	"bufio"
+	"encoding/gob"
 	"encoding/json"
 	"flag"
 	"github.com/huichen/wukong/engine"
@@ -11,6 +12,7 @@ import (
 	"log"
 	"net/http"
 	"os"
+	"os/signal"
 	"reflect"
 	"strconv"
 	"strings"
@@ -143,6 +145,7 @@ func main() {
 	flag.Parse()
 
 	// 初始化
+	gob.Register(WeiboScoringFields{})
 	searcher.Init(types.EngineInitOptions{
 		SegmenterDictionaries: "../../data/dictionary.txt",
 		StopTokenFile:         "../../data/stop_tokens.txt",
@@ -155,6 +158,16 @@ func main() {
 	// 索引
 	go indexWeibo()
 
+	// 捕获ctrl-c
+	c := make(chan os.Signal, 1)
+	signal.Notify(c, os.Interrupt)
+	go func(){
+		for _ = range c {
+			searcher.Close()
+			os.Exit(1)
+		}
+	}()
+
 	http.HandleFunc("/json", JsonRpcServer)
 	http.Handle("/", http.FileServer(http.Dir("static")))
 	log.Print("服务器启动")

+ 3 - 0
examples/custom_scoring_criteria.go

@@ -14,6 +14,7 @@ package main
 
 import (
 	"bufio"
+	"encoding/gob"
 	"flag"
 	"fmt"
 	"github.com/huichen/wukong/engine"
@@ -96,6 +97,7 @@ func main() {
 	log.Printf("待搜索的短语为\"%s\"", *query)
 
 	// 初始化
+	gob.Register(WeiboScoringFields{})
 	searcher.Init(types.EngineInitOptions{
 		SegmenterDictionaries: *dictionaries,
 		StopTokenFile:         *stop_token_file,
@@ -104,6 +106,7 @@ func main() {
 		},
 		DefaultRankOptions: &options,
 	})
+	defer searcher.Close()
 
 	// 读入微博数据
 	file, err := os.Open(*weibo_data)

+ 1 - 0
examples/simplest_example.go

@@ -21,6 +21,7 @@ func main() {
 	// 初始化
 	searcher.Init(types.EngineInitOptions{
 		SegmenterDictionaries: "../data/dictionary.txt"})
+	defer searcher.Close()
 
 	// 将文档加入索引
 	searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"})