抓取历年天气

前端之家收集整理的这篇文章主要介绍了抓取历年天气前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

使用goquery抓取天气的demo。数据量有点多。目前按省份存储天气数据。存储到csv文件中。


  1. package main
  2.  
  3. import (
  4. "code.google.com/p/mahonia"
  5. "encoding/csv"
  6. "fmt"
  7. "github.com/PuerkitoBio/goquery"
  8. "net/http"
  9. "os"
  10. "strings"
  11. "time"
  12. )
  13.  
  14. var log = loger.Loger{
  15. Level: loger.DEBUG,}
  16.  
  17. const (
  18. YEAR = 2013
  19. SleepTime = 100 //毫秒
  20. )
  21.  
  22. func main() {
  23. sc,cc := GetCity()
  24. var weatherInfoAll []*WeaterInfo
  25. for key,value := range sc {
  26. filePath := fmt.Sprintf("%d%s.csv",YEAR,key)
  27. _,err := os.Stat(filePath)
  28. if err == nil {
  29. continue
  30. }
  31. weatherInfoAll = make([]*WeaterInfo,100000)
  32. for _,city := range value {
  33. name := cc[city]
  34. log.Debug("get ",key,city)
  35. client := &http.Client{}
  36. weatherInfoYear := GetWeather(client,city,name)
  37. weatherInfoAll = append(weatherInfoAll,weatherInfoYear...)
  38. }
  39. SaveToCSV(key,weatherInfoAll)
  40. }
  41. }
  42.  
  43. //返回数据为省份=>城市名  城市名=>拼音.html
  44. func GetCity() (sc map[string][]string,cc map[string]string) {
  45. url := "http://www.tianqihoubao.com/lishi/"
  46. request,err := http.NewRequest("GET",url,nil)
  47. if err != nil {
  48. log.Log(err)
  49. return
  50. }
  51. request.Header.Add("User-Agent","Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML,like Gecko) Ubuntu Chromium/39.0.2171.65 Chrome/39.0.2171.65 Safari/537.36")
  52. request.Header.Add("referer","http://www.tianqihoubao.com/")
  53. resp,err := http.DefaultClient.Do(request)
  54. if err != nil {
  55. log.Log(err)
  56. return
  57. }
  58. document,err := goquery.NewDocumentFromResponse(resp)
  59. if err != nil {
  60. log.Log(err)
  61. return
  62. }
  63. gbk := mahonia.NewDecoder("gbk")
  64. sc = make(map[string][]string)
  65. cc = make(map[string]string)
  66. document.Find(".citychk").Find("dl").Each(func(index int,s *goquery.Selection) {
  67. province := gbk.ConvertString(s.Find("dt").Find("b").Text())
  68. citys := make([]string,20)
  69. s.Find("dd").Find("a").Each(func(index int,se *goquery.Selection) {
  70. uri,exists := se.Attr("href")
  71. if !exists {
  72. return
  73. }
  74. name := gbk.ConvertString(se.Text())
  75. uri = strings.Replace(uri,".html","",-1)
  76. citys = append(citys,name)
  77. cc[name] = uri
  78. })
  79. sc[province] = citys
  80. })
  81. return
  82. }
  83.  
  84. type WeaterInfo struct {
  85. Province string
  86. City string
  87. Date string
  88. Info string
  89. Temp string
  90. Wind string
  91. }
  92.  
  93. func GetWeather(client *http.Client,province,name string) []*WeaterInfo {
  94. baseUrl := fmt.Sprintf("http://www.tianqihoubao.com%s/month/%%s",name)
  95. weaterInfoYear := make([]*WeaterInfo,380)
  96. for i := 1; i <= 12; i++ {
  97. url := fmt.Sprintf(baseUrl,fmt.Sprintf("%d%02d.html",i))
  98. weaterInfos := GetWeatherInfo(client,url)
  99. weaterInfoYear = append(weaterInfoYear,weaterInfos...)
  100. time.Sleep(time.Millisecond * SleepTime)
  101. }
  102. return weaterInfoYear
  103. }
  104.  
  105. func GetWeatherInfo(client *http.Client,url string) (weaterInfos []*WeaterInfo) {
  106. request,err := client.Do(request)
  107. if err != nil {
  108. log.Log(err)
  109. return
  110. }
  111. document,err := goquery.NewDocumentFromResponse(resp)
  112. if err != nil {
  113. log.Log(err)
  114. return
  115. }
  116. gbk := mahonia.NewDecoder("gbk")
  117. weaterInfos = make([]*WeaterInfo,31)
  118. document.Find("#content").Find("tbody").Find("tr").Each(func(index int,s *goquery.Selection) {
  119. //排除第一个
  120. if index == 0 {
  121. return
  122. }
  123. var date,info,temp,wind string
  124. s.Find("td").Each(func(index int,se *goquery.Selection) {
  125. if index == 0 {
  126. date = gbk.ConvertString(se.Find("a").Text())
  127. }
  128. if index == 1 {
  129. info = gbk.ConvertString(se.Text())
  130. }
  131. if index == 2 {
  132. temp = gbk.ConvertString(se.Text())
  133. }
  134. if index == 3 {
  135. wind = gbk.ConvertString(se.Text())
  136. }
  137. })
  138. weatherInfo := &WeaterInfo{
  139. Province: province,City: city,Date: date,Info: info,Temp: temp,Wind: wind,}
  140. weaterInfos = append(weaterInfos,weatherInfo)
  141. })
  142. return
  143. }
  144.  
  145. func SaveToCSV(file string,weatherInfos []*WeaterInfo) (err error) {
  146. filePath := fmt.Sprintf("%d%s.csv",file)
  147. _,err = os.Stat(filePath)
  148. if err == nil {
  149. return
  150. }
  151. f,err := os.Create(filePath)
  152. if err != nil {
  153. log.Log(err)
  154. return
  155. }
  156. defer f.Close()
  157. f.WriteString("\xEF\xBB\xBF")  //UTF-8
  158. w := csv.NewWriter(f)
  159. w.Write([]string{"省份","城市","日期","天气状况","气温","风力风向"})
  160. for i,weatherInfo := range weatherInfos {
  161. if i%1000 == 0 {
  162. w.Flush() //刷入文件
  163. }
  164. strs := []string{TrimSpace(weatherInfo.Province),TrimSpace(weatherInfo.City),TrimSpace(weatherInfo.Date),TrimSpace(weatherInfo.Info),TrimSpace(weatherInfo.Temp),TrimSpace(weatherInfo.Wind)}
  165. w.Write(strs)
  166. }
  167. w.Flush()
  168. return
  169. }
  170.  
  171. func TrimSpace(value string) string {
  172. value = strings.Replace(value,"\n",-1)
  173. return strings.Replace(value," ",-1)
  174. }

日志库删掉了,因为看起来有点不是很好。当然,也没有说这个代码好。只是临时写着东西。

猜你在找的Go相关文章