现在的位置: 首页 > 综合 > 正文

提供一个Twitter的API(Python)

2013年09月16日 ⁄ 综合 ⁄ 共 2279字 ⁄ 字号 评论关闭

Twitter 的API有很多,但是总感觉不给力,总是扣扣索索不给你给全了。我总结了下,写了几个API

如下code:

'''
Created on Jun 22, 2013

@author: Yang
'''

import twitter
import json
import time
import datetime
from email.utils import parsedate
import math

# Consumerkey = 'myY0zZaRQ1eDEELFfoen7g'
# Consumersecret = 'RLXWJrU07HaX2KVOyrhIQ8aV81XHMeMKcbAxqDN4Jc'
# token = '321341780-gQ1hpc6gLfqUuDc8TKH6HyvbAnJ1uWWiYEOoKwoY'
# tokensecret = 'RndJtFxDJrhJVNNqjS3XMHgQi6ufY5U6OoRes9URwQ'
# 
# t = twitter.Api(Consumerkey, Consumersecret, token, tokensecret)

def timestamp(str):
    #this function is to convert the time into stamptiem type
    #this time is computed for us in the next steps
    datatime = parsedate(str)
    temp = list(datatime)[0:6]
    c = datetime.datetime(temp[0], temp[1], temp[2], temp[3], temp[4], temp[5])
    temp = time.mktime(c.timetuple())  
    return temp

def SearchQuery(query, t):
    #in order to search tweets by query
    #return a tweet list
    tweets = t.GetSearch(term=query, count=200)
    #count is the numver of tweets
    t = []
    c = 1
    for s in tweets:
        temp = json.loads(str(s)) 
        t.append(temp)
        tweetid = temp['id']
        
    for i in range(0,5):
        tweets = t.GetSearch(temp=query, max_id=tweetid, count=200)
        tlist = []
        for s in tweets:
            temp = json.loads(str(s))
            tlist.append(temp)
            tweetid = temp['id']
        t = t+tlist
    return t

def SearchLocation(geo, t):
    #in order to search by get information
    #return a tweet list
    tweets = t.GetSearch(geocode=geo, count=200)
    
    t = []
    c = 1
    for s in tweets:
        temp = json.loads(str(s)) 
        t.append(temp)
        tweetid = temp['id']
        
    for i in range(0,5):
        tweets = t.GetSearch(geocode=geo, max_id=tweetid, count=200)
        tlist = []
        for s in tweets:
            temp = json.loads(str(s))
            tlist.append(temp)
            tweetid = temp['id']
        t = t+tlist
    return t

def GetUsertweets(id, tweetid, tweettime, delay=24*60*60, t):
    #in order to get the user tweets since and befor tweets
    #in time stamp one day is 24*60*60
    #so we think that the delay is 24*3600
    tweets = t.GetUserTimeline(id, max_id=tweetid, count=100)
    #here we use the max_id to get the tweets
    t = []
    time = timestamp(tweettime)
    
    if len(tweets)!=0:
        for s in tweets:
            temp = json.loads(str(s))
            #t.append(temp)
            creattime = temp['created_at']
            creattime = timestamp(str(creattime))
            if abs(creattime-time)>delay:
                continue
            else:
                t.append(temp)
    
    tweets = t.GetUserTimeline(id, since_id=tweetid, count=100)
    #then we use the since_id to get the tweets 
    if len(tweets)!=0:
        for s in tweets:
            temp = json.loads(str(s))
            #t.append(temp)
            creattime = temp['created_at']
            creattime = timestamp(str(creattime))
            if abs(creattime-time)>delay:
                continue
            else:
                t.append(temp) 
    return t

这个里面有两个主要程序search和getusertweets两个函数

里面都用到了max_id和since_id,并且反复的用,这样就会尽可能多抓到多的tweets。

抱歉!评论已关闭.