ranker_test.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. package core
  2. import (
  3. "github.com/huichen/wukong/types"
  4. "github.com/huichen/wukong/utils"
  5. "reflect"
  6. "testing"
  7. )
  8. type DummyScoringFields struct {
  9. label string
  10. counter int
  11. amount float32
  12. }
  13. type DummyScoringCriteria struct {
  14. Threshold float32
  15. }
  16. func (criteria DummyScoringCriteria) Score(
  17. doc types.IndexedDocument, fields interface{}) []float32 {
  18. if reflect.TypeOf(fields) == reflect.TypeOf(DummyScoringFields{}) {
  19. dsf := fields.(DummyScoringFields)
  20. value := float32(dsf.counter) + dsf.amount
  21. if value < criteria.Threshold {
  22. return []float32{}
  23. }
  24. return []float32{value}
  25. }
  26. return []float32{}
  27. }
  28. func TestRankDocument(t *testing.T) {
  29. var ranker Ranker
  30. ranker.Init()
  31. ranker.AddDoc(1, DummyScoringFields{})
  32. ranker.AddDoc(3, DummyScoringFields{})
  33. ranker.AddDoc(4, DummyScoringFields{})
  34. scoredDocs, _ := ranker.Rank([]types.IndexedDocument{
  35. types.IndexedDocument{DocId: 1, BM25: 6},
  36. types.IndexedDocument{DocId: 3, BM25: 24},
  37. types.IndexedDocument{DocId: 4, BM25: 18},
  38. }, types.RankOptions{ScoringCriteria: types.RankByBM25{}}, false)
  39. utils.Expect(t, "[3 [24000 ]] [4 [18000 ]] [1 [6000 ]] ", scoredDocsToString(scoredDocs))
  40. scoredDocs, _ = ranker.Rank([]types.IndexedDocument{
  41. types.IndexedDocument{DocId: 1, BM25: 6},
  42. types.IndexedDocument{DocId: 3, BM25: 24},
  43. types.IndexedDocument{DocId: 2, BM25: 0},
  44. types.IndexedDocument{DocId: 4, BM25: 18},
  45. }, types.RankOptions{ScoringCriteria: types.RankByBM25{}, ReverseOrder: true}, false)
  46. // doc0因为没有AddDoc所以没有添加进来
  47. utils.Expect(t, "[1 [6000 ]] [4 [18000 ]] [3 [24000 ]] ", scoredDocsToString(scoredDocs))
  48. }
  49. func TestRankWithCriteria(t *testing.T) {
  50. var ranker Ranker
  51. ranker.Init()
  52. ranker.AddDoc(1, DummyScoringFields{
  53. label: "label3",
  54. counter: 3,
  55. amount: 22.3,
  56. })
  57. ranker.AddDoc(2, DummyScoringFields{
  58. label: "label4",
  59. counter: 1,
  60. amount: 2,
  61. })
  62. ranker.AddDoc(3, DummyScoringFields{
  63. label: "label1",
  64. counter: 7,
  65. amount: 10.3,
  66. })
  67. ranker.AddDoc(4, DummyScoringFields{
  68. label: "label1",
  69. counter: -1,
  70. amount: 2.3,
  71. })
  72. criteria := DummyScoringCriteria{}
  73. scoredDocs, _ := ranker.Rank([]types.IndexedDocument{
  74. types.IndexedDocument{DocId: 1, TokenProximity: 6},
  75. types.IndexedDocument{DocId: 2, TokenProximity: -1},
  76. types.IndexedDocument{DocId: 3, TokenProximity: 24},
  77. types.IndexedDocument{DocId: 4, TokenProximity: 18},
  78. }, types.RankOptions{ScoringCriteria: criteria}, false)
  79. utils.Expect(t, "[1 [25300 ]] [3 [17300 ]] [2 [3000 ]] [4 [1300 ]] ", scoredDocsToString(scoredDocs))
  80. criteria.Threshold = 4
  81. scoredDocs, _ = ranker.Rank([]types.IndexedDocument{
  82. types.IndexedDocument{DocId: 1, TokenProximity: 6},
  83. types.IndexedDocument{DocId: 2, TokenProximity: -1},
  84. types.IndexedDocument{DocId: 3, TokenProximity: 24},
  85. types.IndexedDocument{DocId: 4, TokenProximity: 18},
  86. }, types.RankOptions{ScoringCriteria: criteria}, false)
  87. utils.Expect(t, "[1 [25300 ]] [3 [17300 ]] ", scoredDocsToString(scoredDocs))
  88. }
  89. func TestRemoveDoc(t *testing.T) {
  90. var ranker Ranker
  91. ranker.Init()
  92. ranker.AddDoc(1, DummyScoringFields{
  93. label: "label3",
  94. counter: 3,
  95. amount: 22.3,
  96. })
  97. ranker.AddDoc(2, DummyScoringFields{
  98. label: "label4",
  99. counter: 1,
  100. amount: 2,
  101. })
  102. ranker.AddDoc(3, DummyScoringFields{
  103. label: "label1",
  104. counter: 7,
  105. amount: 10.3,
  106. })
  107. ranker.RemoveDoc(3)
  108. criteria := DummyScoringCriteria{}
  109. scoredDocs, _ := ranker.Rank([]types.IndexedDocument{
  110. types.IndexedDocument{DocId: 1, TokenProximity: 6},
  111. types.IndexedDocument{DocId: 2, TokenProximity: -1},
  112. types.IndexedDocument{DocId: 3, TokenProximity: 24},
  113. types.IndexedDocument{DocId: 4, TokenProximity: 18},
  114. }, types.RankOptions{ScoringCriteria: criteria}, false)
  115. utils.Expect(t, "[1 [25300 ]] [2 [3000 ]] ", scoredDocsToString(scoredDocs))
  116. }