преди 10 години · f7a0c467d7
--- a/engine/engine.go
+++ b/engine/engine.go
@@ -37,7 +37,7 @@ type Engine struct {
 
				 	rankers    []core.Ranker
			
 
				 	segmenter  sego.Segmenter
			
 
				 	stopTokens StopTokens
			
 
				-	dbs []storage.Storage
			
 
				+	dbs        []storage.Storage
			
 
				 
			
 
				 	// 建立索引器使用的通信通道
			
 
				 	segmenterChannel               chan segmenterRequest
			
@@ -66,11 +66,13 @@ func (engine *Engine) Init(options types.EngineInitOptions) {
 
				 	engine.initOptions = options
			
 
				 	engine.initialized = true
			
 
				 
			
 
				-	// 载入分词器词典
			
 
				-	engine.segmenter.LoadDictionary(options.SegmenterDictionaries)
			
 
				+	if !options.NotUsingSegmenter {
			
 
				+		// 载入分词器词典
			
 
				+		engine.segmenter.LoadDictionary(options.SegmenterDictionaries)
			
 
				 
			
 
				-	// 初始化停用词
			
 
				-	engine.stopTokens.Init(options.StopTokenFile)
			
 
				+		// 初始化停用词
			
 
				+		engine.stopTokens.Init(options.StopTokenFile)
			
 
				+	}
			
 
				 
			
 
				 	// 初始化索引器和排序器
			
 
				 	for shard := 0; shard < options.NumShards; shard++ {
			
--- a/engine/segmenter_worker.go
+++ b/engine/segmenter_worker.go
@@ -17,7 +17,7 @@ func (engine *Engine) segmenterWorker() {
 
				 
			
 
				 		tokensMap := make(map[string][]int)
			
 
				 		numTokens := 0
			
 
				-		if request.data.Content != "" {
			
 
				+		if !engine.initOptions.NotUsingSegmenter && request.data.Content != "" {
			
 
				 			// 当文档正文不为空时，优先从内容分词中得到关键词
			
 
				 			segments := engine.segmenter.Segment([]byte(request.data.Content))
			
 
				 			for _, segment := range segments {
			
@@ -37,10 +37,12 @@ func (engine *Engine) segmenterWorker() {
 
				 			numTokens = len(request.data.Tokens)
			
 
				 		}
			
 
				 
			
 
				-		// 加入非分词的文档标签
			
 
				-		for _, label := range request.data.Labels {
			
 
				-			if !engine.stopTokens.IsStopToken(label) {
			
 
				-				tokensMap[label] = []int{}
			
 
				+		if !engine.initOptions.NotUsingSegmenter {
			
 
				+			// 加入非分词的文档标签
			
 
				+			for _, label := range request.data.Labels {
			
 
				+				if !engine.stopTokens.IsStopToken(label) {
			
 
				+					tokensMap[label] = []int{}
			
 
				+				}
			
 
				 			}
			
 
				 		}
			
 
				 
			
--- a/types/engine_init_options.go
+++ b/types/engine_init_options.go
@@ -28,6 +28,12 @@ var (
 
				 )
			
 
				 
			
 
				 type EngineInitOptions struct {
			
 
				+	// 是否使用分词器
			
 
				+	// 默认使用，否则在启动阶段跳过SegmenterDictionaries和StopTokenFile设置
			
 
				+	// 如果你不需要在引擎内分词，可以将这个选项设为true
			
 
				+	// 注意，如果你不用分词器，那么在调用IndexDocument时DocumentIndexData中的Content会被忽略
			
 
				+	NotUsingSegmenter bool
			
 
				+
			
 
				 	// 半角逗号分隔的字典文件，具体用法见
			
 
				 	// sego.Segmenter.LoadDictionary函数的注释
			
 
				 	SegmenterDictionaries string
			
@@ -61,15 +67,17 @@ type EngineInitOptions struct {
 
				 	DefaultRankOptions *RankOptions
			
 
				 
			
 
				 	// 是否使用持久数据库，以及数据库文件保存的目录和裂分数目
			
 
				-	UsePersistentStorage bool
			
 
				+	UsePersistentStorage    bool
			
 
				 	PersistentStorageFolder string
			
 
				 	PersistentStorageShards int
			
 
				 }
			
 
				 
			
 
				 // 初始化EngineInitOptions，当用户未设定某个选项的值时用默认值取代
			
 
				 func (options *EngineInitOptions) Init() {
			
 
				-	if options.SegmenterDictionaries == "" {
			
 
				-		log.Fatal("字典文件不能为空")
			
 
				+	if !options.NotUsingSegmenter {
			
 
				+		if options.SegmenterDictionaries == "" {
			
 
				+			log.Fatal("字典文件不能为空")
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	if options.NumSegmenterThreads == 0 {