http://www.oschina.net/code/snippet_1026739_23103
我看好多人写着玩,下载下来没执行成功(我没看代码不知原因),然后自己写个玩玩 -_#
请自行在代码目录下 创建 teba文件夹。。
当然了 口味重的 可以把 贴吧地址 改成 LES 吧 或者 同志吧 之类的
#!/usr/bin/env python # -*- coding:utf-8 -*- import urllib2 import re import sys from threading import Thread import time import random import hashlib class tieba(object): url = None dirPath = None __md5 = None def __init__(self): self.url = "http://tieba.baidu.com/f?kw=%BD%E3%CD%D1&tp=0&pn=" self.dirPath = sys.path[0] + "/tieba/" self.__md5 = hashlib.md5() def getImages(self, page): url = self.url + str(page*50) req = urllib2.Request(url) res = urllib2.urlopen(url) html = res.read() rc = '<img src="[^"]*" original="[^"]*" bpic="([^"]*)"[^>]*\/>' html = re.findall(rc, html, re.MULTILINE | re.DOTALL) return html def saveImg(self, images): for i in images: rand = str(random.randint(1, 10000)) + i self.__md5.update(rand) fname = self.__md5.hexdigest() fname = self.dirPath + fname + ".jpg" req = urllib2.Request(i) res = urllib2.urlopen(i) pic = res.read() f = open(fname, "wb"); f.write(pic); f.close() class catch(Thread): startPage = None endPage = None def __init__(self, start, end): Thread.__init__(self) self.startPage = start self.endPage = end def run(self): loop = range(self.startPage, self.endPage + 1) for i in loop: t = tieba() imgs = t.getImages(i) t.saveImg(imgs) print "get page %d success" % i sys.stdout.flush() if __name__ == '__main__': maxPage = 500 threadSum = 50 if threadSum > maxPage: threadSum = maxPage urlCount = maxPage / threadSum for i in range(0, threadSum): c = catch(i * urlCount, (i + 1)* urlCount - 1) c.start()