crawl_weibo_data.go 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. package main
  2. import (
  3. "flag"
  4. "fmt"
  5. "github.com/huichen/gobo"
  6. "github.com/huichen/gobo/contrib"
  7. "io/ioutil"
  8. "log"
  9. "os"
  10. "strings"
  11. "time"
  12. )
  13. var (
  14. access_token = flag.String("access_token", "", "用户的访问令牌")
  15. weibo = gobo.Weibo{}
  16. users_file = flag.String("users_file", "users.txt", "从该文件读入要下载的微博用户名,每个名字一行")
  17. output_file = flag.String("output_file", "weibo_data.txt", "将抓取的微博写入下面的文件")
  18. num_weibos = flag.Int("num_weibos", 2000, "从每个微博账号中抓取多少条微博")
  19. )
  20. func main() {
  21. flag.Parse()
  22. // 读取用户名
  23. content, err := ioutil.ReadFile(*users_file)
  24. if err != nil {
  25. log.Fatal("无法读取-users_file")
  26. }
  27. users := strings.Split(string(content), "\n")
  28. outputFile, _ := os.Create(*output_file)
  29. defer outputFile.Close()
  30. // 抓微博
  31. for _, user := range users {
  32. if user == "" {
  33. continue
  34. }
  35. log.Printf("抓取 @%s 的微博", user)
  36. statuses, err := contrib.GetStatuses(
  37. &weibo, *access_token, user, 0, *num_weibos, 5000) // 超时5秒
  38. if err != nil {
  39. log.Print(err)
  40. continue
  41. }
  42. for _, status := range statuses {
  43. t, _ := time.Parse("Mon Jan 2 15:04:05 -0700 2006", status.Created_At)
  44. outputFile.WriteString(fmt.Sprintf(
  45. "%d||||%d||||%d||||%s||||%d||||%d||||%d||||%s||||%s||||%s\n",
  46. status.Id, uint32(t.Unix()), status.User.Id, status.User.Screen_Name,
  47. status.Reposts_Count, status.Comments_Count, status.Attitudes_Count,
  48. status.Thumbnail_Pic, status.Original_Pic, status.Text))
  49. }
  50. }
  51. }