| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- package core
- import (
- "github.com/huichen/wukong/types"
- "github.com/huichen/wukong/utils"
- "log"
- "sort"
- "sync"
- )
- type Ranker struct {
- lock struct {
- sync.RWMutex
- fields map[uint64]interface{}
- docs map[uint64]bool
- }
- initialized bool
- }
- func (ranker *Ranker) Init() {
- if ranker.initialized == true {
- log.Fatal("排序器不能初始化两次")
- }
- ranker.initialized = true
- ranker.lock.fields = make(map[uint64]interface{})
- ranker.lock.docs = make(map[uint64]bool)
- }
- // 给某个文档添加评分字段
- func (ranker *Ranker) AddDoc(docId uint64, fields interface{}) {
- if ranker.initialized == false {
- log.Fatal("排序器尚未初始化")
- }
- ranker.lock.Lock()
- ranker.lock.fields[docId] = fields
- ranker.lock.docs[docId] = true
- ranker.lock.Unlock()
- }
- // 删除某个文档的评分字段
- func (ranker *Ranker) RemoveDoc(docId uint64) {
- if ranker.initialized == false {
- log.Fatal("排序器尚未初始化")
- }
- ranker.lock.Lock()
- delete(ranker.lock.fields, docId)
- delete(ranker.lock.docs, docId)
- ranker.lock.Unlock()
- }
- // 给文档评分并排序
- func (ranker *Ranker) Rank(
- docs []types.IndexedDocument, options types.RankOptions, countDocsOnly bool) (types.ScoredDocuments, int) {
- if ranker.initialized == false {
- log.Fatal("排序器尚未初始化")
- }
- // 对每个文档评分
- var outputDocs types.ScoredDocuments
- numDocs := 0
- for _, d := range docs {
- ranker.lock.RLock()
- // 判断doc是否存在
- if _, ok := ranker.lock.docs[d.DocId]; ok {
- fs := ranker.lock.fields[d.DocId]
- ranker.lock.RUnlock()
- // 计算评分并剔除没有分值的文档
- scores := options.ScoringCriteria.Score(d, fs)
- if len(scores) > 0 {
- if !countDocsOnly {
- outputDocs = append(outputDocs, types.ScoredDocument{
- DocId: d.DocId,
- Scores: scores,
- TokenSnippetLocations: d.TokenSnippetLocations,
- TokenLocations: d.TokenLocations})
- }
- numDocs++
- }
- } else {
- ranker.lock.RUnlock()
- }
- }
- // 排序
- if !countDocsOnly {
- if options.ReverseOrder {
- sort.Sort(sort.Reverse(outputDocs))
- } else {
- sort.Sort(outputDocs)
- }
- // 当用户要求只返回部分结果时返回部分结果
- var start, end int
- if options.MaxOutputs != 0 {
- start = utils.MinInt(options.OutputOffset, len(outputDocs))
- end = utils.MinInt(options.OutputOffset+options.MaxOutputs, len(outputDocs))
- } else {
- start = utils.MinInt(options.OutputOffset, len(outputDocs))
- end = len(outputDocs)
- }
- return outputDocs[start:end], numDocs
- }
- return outputDocs, numDocs
- }
|