Twitter 的API有很多,但是总感觉不给力,总是扣扣索索不给你给全了。我总结了下,写了几个API
如下code:
''' Created on Jun 22, 2013 @author: Yang ''' import twitter import json import time import datetime from email.utils import parsedate import math # Consumerkey = 'myY0zZaRQ1eDEELFfoen7g' # Consumersecret = 'RLXWJrU07HaX2KVOyrhIQ8aV81XHMeMKcbAxqDN4Jc' # token = '321341780-gQ1hpc6gLfqUuDc8TKH6HyvbAnJ1uWWiYEOoKwoY' # tokensecret = 'RndJtFxDJrhJVNNqjS3XMHgQi6ufY5U6OoRes9URwQ' # # t = twitter.Api(Consumerkey, Consumersecret, token, tokensecret) def timestamp(str): #this function is to convert the time into stamptiem type #this time is computed for us in the next steps datatime = parsedate(str) temp = list(datatime)[0:6] c = datetime.datetime(temp[0], temp[1], temp[2], temp[3], temp[4], temp[5]) temp = time.mktime(c.timetuple()) return temp def SearchQuery(query, t): #in order to search tweets by query #return a tweet list tweets = t.GetSearch(term=query, count=200) #count is the numver of tweets t = [] c = 1 for s in tweets: temp = json.loads(str(s)) t.append(temp) tweetid = temp['id'] for i in range(0,5): tweets = t.GetSearch(temp=query, max_id=tweetid, count=200) tlist = [] for s in tweets: temp = json.loads(str(s)) tlist.append(temp) tweetid = temp['id'] t = t+tlist return t def SearchLocation(geo, t): #in order to search by get information #return a tweet list tweets = t.GetSearch(geocode=geo, count=200) t = [] c = 1 for s in tweets: temp = json.loads(str(s)) t.append(temp) tweetid = temp['id'] for i in range(0,5): tweets = t.GetSearch(geocode=geo, max_id=tweetid, count=200) tlist = [] for s in tweets: temp = json.loads(str(s)) tlist.append(temp) tweetid = temp['id'] t = t+tlist return t def GetUsertweets(id, tweetid, tweettime, delay=24*60*60, t): #in order to get the user tweets since and befor tweets #in time stamp one day is 24*60*60 #so we think that the delay is 24*3600 tweets = t.GetUserTimeline(id, max_id=tweetid, count=100) #here we use the max_id to get the tweets t = [] time = timestamp(tweettime) if len(tweets)!=0: for s in tweets: temp = json.loads(str(s)) #t.append(temp) creattime = temp['created_at'] creattime = timestamp(str(creattime)) if abs(creattime-time)>delay: continue else: t.append(temp) tweets = t.GetUserTimeline(id, since_id=tweetid, count=100) #then we use the since_id to get the tweets if len(tweets)!=0: for s in tweets: temp = json.loads(str(s)) #t.append(temp) creattime = temp['created_at'] creattime = timestamp(str(creattime)) if abs(creattime-time)>delay: continue else: t.append(temp) return t
这个里面有两个主要程序search和getusertweets两个函数
里面都用到了max_id和since_id,并且反复的用,这样就会尽可能多抓到多的tweets。