engine_init_options.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. package types
  2. import (
  3. "log"
  4. "runtime"
  5. )
  6. var (
  7. // EngineInitOptions的默认值
  8. defaultNumSegmenterThreads = runtime.NumCPU()
  9. defaultNumShards = 2
  10. defaultIndexerBufferLength = runtime.NumCPU()
  11. defaultNumIndexerThreadsPerShard = runtime.NumCPU()
  12. defaultRankerBufferLength = runtime.NumCPU()
  13. defaultNumRankerThreadsPerShard = runtime.NumCPU()
  14. defaultDefaultRankOptions = RankOptions{
  15. ScoringCriteria: RankByBM25{},
  16. }
  17. defaultIndexerInitOptions = IndexerInitOptions{
  18. IndexType: FrequenciesIndex,
  19. BM25Parameters: &defaultBM25Parameters,
  20. }
  21. defaultBM25Parameters = BM25Parameters{
  22. K1: 2.0,
  23. B: 0.75,
  24. }
  25. defaultPersistentStorageShards = 8
  26. )
  27. type EngineInitOptions struct {
  28. // 半角逗号分隔的字典文件,具体用法见
  29. // sego.Segmenter.LoadDictionary函数的注释
  30. SegmenterDictionaries string
  31. // 停用词文件
  32. StopTokenFile string
  33. // 分词器线程数
  34. NumSegmenterThreads int
  35. // 索引器和排序器的shard数目
  36. // 被检索/排序的文档会被均匀分配到各个shard中
  37. NumShards int
  38. // 索引器的信道缓冲长度
  39. IndexerBufferLength int
  40. // 索引器每个shard分配的线程数
  41. NumIndexerThreadsPerShard int
  42. // 排序器的信道缓冲长度
  43. RankerBufferLength int
  44. // 排序器每个shard分配的线程数
  45. NumRankerThreadsPerShard int
  46. // 索引器初始化选项
  47. IndexerInitOptions *IndexerInitOptions
  48. // 默认的搜索选项
  49. DefaultRankOptions *RankOptions
  50. // 是否使用持久数据库,以及数据库文件保存的目录和裂分数目
  51. UsePersistentStorage bool
  52. PersistentStorageFolder string
  53. PersistentStorageShards int
  54. }
  55. // 初始化EngineInitOptions,当用户未设定某个选项的值时用默认值取代
  56. func (options *EngineInitOptions) Init() {
  57. if options.SegmenterDictionaries == "" {
  58. log.Fatal("字典文件不能为空")
  59. }
  60. if options.NumSegmenterThreads == 0 {
  61. options.NumSegmenterThreads = defaultNumSegmenterThreads
  62. }
  63. if options.NumShards == 0 {
  64. options.NumShards = defaultNumShards
  65. }
  66. if options.IndexerBufferLength == 0 {
  67. options.IndexerBufferLength = defaultIndexerBufferLength
  68. }
  69. if options.NumIndexerThreadsPerShard == 0 {
  70. options.NumIndexerThreadsPerShard = defaultNumIndexerThreadsPerShard
  71. }
  72. if options.RankerBufferLength == 0 {
  73. options.RankerBufferLength = defaultRankerBufferLength
  74. }
  75. if options.NumRankerThreadsPerShard == 0 {
  76. options.NumRankerThreadsPerShard = defaultNumRankerThreadsPerShard
  77. }
  78. if options.IndexerInitOptions == nil {
  79. options.IndexerInitOptions = &defaultIndexerInitOptions
  80. }
  81. if options.IndexerInitOptions.BM25Parameters == nil {
  82. options.IndexerInitOptions.BM25Parameters = &defaultBM25Parameters
  83. }
  84. if options.DefaultRankOptions == nil {
  85. options.DefaultRankOptions = &defaultDefaultRankOptions
  86. }
  87. if options.DefaultRankOptions.ScoringCriteria == nil {
  88. options.DefaultRankOptions.ScoringCriteria = defaultDefaultRankOptions.ScoringCriteria
  89. }
  90. if options.PersistentStorageShards == 0 {
  91. options.PersistentStorageShards = defaultPersistentStorageShards
  92. }
  93. }