爬取地址 http://www.wunderground.com/history/ariport/KBUF/2009/1/1/DailyHistory.html?req_city=NA&req_state=NA&req_statename=NA
#-*-coding:utf-8-*-
#!/usr/bin/python
import urllib2
from BeautifulSoup import BeautifulSoup
#创建记录文本
f = open('wunder-data.txt','w')
#按年月访问
for m in range(2, 13):
for d in range(12, 32):
#检查年月是否符合
if (m == 2 and d > 28):
break
elif (m in [4, 6, 9, 11] and d > 30):
break
#打开各url
timestamp = '2009'+str(m)+str(d)
print "Getting data for "+timestamp
url = "http://www.wunderground.com/history/ariport/KBUF/2009/"+str(m)+"/"+str(d)+"/DailyHistory.html"
page = urllib2.urlopen(url)
#从页面上获取温度
soup = BeautifulSoup(page)
#dayTemp= soup.body.nobr.b.string
dayTemp = soup.findAll(attrs={"class":"nobr"})[3].span.string
#讲输出格式化
if(len(str(m)) < 2):
mStamp = '0'+ str(m)
else:
mStamp = str(m)
if(len(str(d)) < 2):
dStamp = '0'+ str(d)
else:
dStamp = str(d)
#创建时间戳
timestamp = '2009'+mStamp+dStamp
f.write(timestamp+','+dayTemp+'\n')
f.close()