stop_tokens.go 681 B

12345678910111213141516171819202122232425262728293031323334353637383940
  1. package engine
  2. import (
  3. "bufio"
  4. "log"
  5. "os"
  6. )
  7. type StopTokens struct {
  8. stopTokens map[string]bool
  9. }
  10. // 从stopTokenFile中读入停用词,一个词一行
  11. // 文档索引建立时会跳过这些停用词
  12. func (st *StopTokens) Init(stopTokenFile string) {
  13. st.stopTokens = make(map[string]bool)
  14. if stopTokenFile == "" {
  15. return
  16. }
  17. file, err := os.Open(stopTokenFile)
  18. if err != nil {
  19. log.Fatal(err)
  20. }
  21. defer file.Close()
  22. scanner := bufio.NewScanner(file)
  23. for scanner.Scan() {
  24. text := scanner.Text()
  25. if text != "" {
  26. st.stopTokens[text] = true
  27. }
  28. }
  29. }
  30. func (st *StopTokens) IsStopToken(token string) bool {
  31. _, found := st.stopTokens[token]
  32. return found
  33. }