| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- package types
- import (
- "log"
- "runtime"
- )
- var (
- // EngineInitOptions的默认值
- defaultNumSegmenterThreads = runtime.NumCPU()
- defaultNumShards = 2
- defaultIndexerBufferLength = runtime.NumCPU()
- defaultNumIndexerThreadsPerShard = runtime.NumCPU()
- defaultRankerBufferLength = runtime.NumCPU()
- defaultNumRankerThreadsPerShard = runtime.NumCPU()
- defaultDefaultRankOptions = RankOptions{
- ScoringCriteria: RankByBM25{},
- }
- defaultIndexerInitOptions = IndexerInitOptions{
- IndexType: FrequenciesIndex,
- BM25Parameters: &defaultBM25Parameters,
- }
- defaultBM25Parameters = BM25Parameters{
- K1: 2.0,
- B: 0.75,
- }
- )
- type EngineInitOptions struct {
- // 半角逗号分隔的字典文件,具体用法见
- // sego.Segmenter.LoadDictionary函数的注释
- SegmenterDictionaries string
- // 停用词文件
- StopTokenFile string
- // 分词器线程数
- NumSegmenterThreads int
- // 索引器和排序器的shard数目
- // 被检索/排序的文档会被均匀分配到各个shard中
- NumShards int
- // 索引器的信道缓冲长度
- IndexerBufferLength int
- // 索引器每个shard分配的线程数
- NumIndexerThreadsPerShard int
- // 排序器的信道缓冲长度
- RankerBufferLength int
- // 排序器每个shard分配的线程数
- NumRankerThreadsPerShard int
- // 索引器初始化选项
- IndexerInitOptions *IndexerInitOptions
- // 默认的搜索选项
- DefaultRankOptions *RankOptions
- }
- // 初始化EngineInitOptions,当用户未设定某个选项的值时用默认值取代
- func (options *EngineInitOptions) Init() {
- if options.SegmenterDictionaries == "" {
- log.Fatal("字典文件不能为空")
- }
- if options.NumSegmenterThreads == 0 {
- options.NumSegmenterThreads = defaultNumSegmenterThreads
- }
- if options.NumShards == 0 {
- options.NumShards = defaultNumShards
- }
- if options.IndexerBufferLength == 0 {
- options.IndexerBufferLength = defaultIndexerBufferLength
- }
- if options.NumIndexerThreadsPerShard == 0 {
- options.NumIndexerThreadsPerShard = defaultNumIndexerThreadsPerShard
- }
- if options.RankerBufferLength == 0 {
- options.RankerBufferLength = defaultRankerBufferLength
- }
- if options.NumRankerThreadsPerShard == 0 {
- options.NumRankerThreadsPerShard = defaultNumRankerThreadsPerShard
- }
- if options.IndexerInitOptions == nil {
- options.IndexerInitOptions = &defaultIndexerInitOptions
- }
- if options.IndexerInitOptions.BM25Parameters == nil {
- options.IndexerInitOptions.BM25Parameters = &defaultBM25Parameters
- }
- if options.DefaultRankOptions == nil {
- options.DefaultRankOptions = &defaultDefaultRankOptions
- }
- if options.DefaultRankOptions.ScoringCriteria == nil {
- options.DefaultRankOptions.ScoringCriteria = defaultDefaultRankOptions.ScoringCriteria
- }
- }
|