engine_init_options.go 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. package types
  2. import (
  3. "log"
  4. "runtime"
  5. )
  6. var (
  7. // EngineInitOptions的默认值
  8. defaultNumSegmenterThreads = runtime.NumCPU()
  9. defaultNumShards = 2
  10. defaultIndexerBufferLength = runtime.NumCPU()
  11. defaultNumIndexerThreadsPerShard = runtime.NumCPU()
  12. defaultRankerBufferLength = runtime.NumCPU()
  13. defaultNumRankerThreadsPerShard = runtime.NumCPU()
  14. defaultDefaultRankOptions = RankOptions{
  15. ScoringCriteria: RankByBM25{},
  16. }
  17. defaultIndexerInitOptions = IndexerInitOptions{
  18. IndexType: FrequenciesIndex,
  19. BM25Parameters: &defaultBM25Parameters,
  20. }
  21. defaultBM25Parameters = BM25Parameters{
  22. K1: 2.0,
  23. B: 0.75,
  24. }
  25. defaultPersistentStorageShards = 8
  26. )
  27. type EngineInitOptions struct {
  28. // 是否使用分词器
  29. // 默认使用,否则在启动阶段跳过SegmenterDictionaries和StopTokenFile设置
  30. // 如果你不需要在引擎内分词,可以将这个选项设为true
  31. // 注意,如果你不用分词器,那么在调用IndexDocument时DocumentIndexData中的Content会被忽略
  32. NotUsingSegmenter bool
  33. // 半角逗号分隔的字典文件,具体用法见
  34. // sego.Segmenter.LoadDictionary函数的注释
  35. SegmenterDictionaries string
  36. // 停用词文件
  37. StopTokenFile string
  38. // 分词器线程数
  39. NumSegmenterThreads int
  40. // 索引器和排序器的shard数目
  41. // 被检索/排序的文档会被均匀分配到各个shard中
  42. NumShards int
  43. // 索引器的信道缓冲长度
  44. IndexerBufferLength int
  45. // 索引器每个shard分配的线程数
  46. NumIndexerThreadsPerShard int
  47. // 排序器的信道缓冲长度
  48. RankerBufferLength int
  49. // 排序器每个shard分配的线程数
  50. NumRankerThreadsPerShard int
  51. // 索引器初始化选项
  52. IndexerInitOptions *IndexerInitOptions
  53. // 默认的搜索选项
  54. DefaultRankOptions *RankOptions
  55. // 是否使用持久数据库,以及数据库文件保存的目录和裂分数目
  56. UsePersistentStorage bool
  57. PersistentStorageFolder string
  58. PersistentStorageShards int
  59. }
  60. // 初始化EngineInitOptions,当用户未设定某个选项的值时用默认值取代
  61. func (options *EngineInitOptions) Init() {
  62. if !options.NotUsingSegmenter {
  63. if options.SegmenterDictionaries == "" {
  64. log.Fatal("字典文件不能为空")
  65. }
  66. }
  67. if options.NumSegmenterThreads == 0 {
  68. options.NumSegmenterThreads = defaultNumSegmenterThreads
  69. }
  70. if options.NumShards == 0 {
  71. options.NumShards = defaultNumShards
  72. }
  73. if options.IndexerBufferLength == 0 {
  74. options.IndexerBufferLength = defaultIndexerBufferLength
  75. }
  76. if options.NumIndexerThreadsPerShard == 0 {
  77. options.NumIndexerThreadsPerShard = defaultNumIndexerThreadsPerShard
  78. }
  79. if options.RankerBufferLength == 0 {
  80. options.RankerBufferLength = defaultRankerBufferLength
  81. }
  82. if options.NumRankerThreadsPerShard == 0 {
  83. options.NumRankerThreadsPerShard = defaultNumRankerThreadsPerShard
  84. }
  85. if options.IndexerInitOptions == nil {
  86. options.IndexerInitOptions = &defaultIndexerInitOptions
  87. }
  88. if options.IndexerInitOptions.BM25Parameters == nil {
  89. options.IndexerInitOptions.BM25Parameters = &defaultBM25Parameters
  90. }
  91. if options.DefaultRankOptions == nil {
  92. options.DefaultRankOptions = &defaultDefaultRankOptions
  93. }
  94. if options.DefaultRankOptions.ScoringCriteria == nil {
  95. options.DefaultRankOptions.ScoringCriteria = defaultDefaultRankOptions.ScoringCriteria
  96. }
  97. if options.PersistentStorageShards == 0 {
  98. options.PersistentStorageShards = defaultPersistentStorageShards
  99. }
  100. }