$sourcehtml = file_get_contents($url);
$pattern = '|/$/("#replycounttop, #replycount001, #replycountbottom"/)/.attr/("innerHTML", (.*?)/);|ims';
if(preg_match($pattern,$sourcehtml,$match)){
$data['comment_num'] = $match[1];
}
if (preg_match('|HotTieArea/(/$/("#tieArea"/), "(.*?)", "(.*?)"|ims',$sourcehtml,$out)) {
$data['comment_url'] = "http://comment.news.163.com/{$out[2]}/{$out[1]}.html";
}
pr($data);
}
/**
* 新浪新闻测试
*/
function test_sina(){
echo "<pre>";
$url = "http://news.sina.com.cn/s/2010-11-18/121221492927.shtml";
$url = "http://mil.news.sina.com.cn/2010-11-18/0930619674.html";
$url = 'http://finance.sina.com.cn/china/hgjj/20101129/01349021897.shtml';
$url = 'http://tech.sina.com.cn/t/3g/2010-11-29/00054917255.shtml';
$url = 'http://ent.sina.com.cn/j/2010-11-29/03063161010.shtml';
$url = 'http://auto.sina.com.cn/car/2010-11-29/0749682428.shtml';
$url = 'http://finance.sina.com.cn/roll/20101130/01549028210.shtml';
$url = 'http://news.sina.com.cn/o/2010-12-01/103121564537.shtml';
$url = 'http://edu.sina.com.cn/official/2010-12-05/1519277540.shtml';
$url = 'http://travel.sina.com.cn/world/2010-12-02/0939148379.shtml';
$url = 'http://sports.sina.com.cn/k/2010-12-06/10125353046.shtml';
$url = 'http://sh.sina.com.cn/news/s/2010-12-06/0803164846.html';
$url = 'http://gongyi.sina.com.cn/gyzx/2010-11-23/151521926.html';
$sourcehtml = file_get_contents($url);
$pattern = '|channel:"(.*?)",newsid:"(.*?)"|ims';
if(preg_match($pattern,$sourcehtml,$match)){
//pr($match);
$channel = $match[1];
$newsid = $match[2];
$data['comment_url'] = "http://comment4.news.sina.com.cn/comment/skin/default.html?channel={$channel}&newsid={$newsid}&style=0";
$cmt_num_url = "http://comment4.news.sina.com.cn/cgi-bin/comment/page_xml.cgi?type=P_TYPE_CMSG_JS&channel={$channel}&newsid={$newsid}&page=1";
$cmt_num_content = file_get_contents($cmt_num_url);
if(preg_match('@Count.c_total.*?=.*?"(/d+)"@ims',$cmt_num_content,$out)){
$data['comment_num'] = $out[1];
}else{
$data['comment_num'] = 0;
}
}
print_r($data);
}
/**
* 腾讯新闻测试
*/
function test_qq(){
echo "<pre>";
$url = "http://news.qq.com/a/20101118/000311.htm";
$url = 'http://finance.qq.com/a/20101129/000043.htm';
$url = 'http://tech.qq.com/a/20101129/000021.htm';
$url = 'http://ent.qq.com/a/20101128/000078.htm';
$url = 'http://lady.qq.com/a/20101129/000033.htm';
$url = 'http://sports.qq.com/a/20101128/000897.htm';
$url = 'http://auto.qq.com/a/20101206/000026.htm';
$url = 'http://edu.qq.com/a/20101206/000007.htm';
$url = 'http://xian.qq.com/a/20101206/000005.htm';
$url = 'http://cd.qq.com/a/20101206/000027.htm';
$url = 'http://hb.qq.com/a/20101206/000686.htm';
$url = 'http://2010.qq.com/a/20101031/000205.htm';
$url = 'http://games.qq.com/a/20101206/000005.htm';
$url = 'http://worldcup.qq.com/a/20100712/002214.htm';
$sourcehtml = file_get_contents($url);
$pattern = '|cmt_id = (/d+)|ims';
if(preg_match($pattern,$sourcehtml,$match)){
$cmt_id = $match[1];
}
$cmt_num_url = "http://sum.comment.gtimg.com.cn/php_qqcom/gsum.php?site=news&c_id=".$cmt_id;
$cmt_num_page = @file_get_contents($cmt_num_url);
if( preg_match("|_cbSum/((/d+),|i",$cmt_num_page,$out)){
$data['comment_num'] = $out[1];
}
$data['comment_url'] = "http://comment5.news.qq.com/comment.htm?site=news&id=".$cmt_id;
print_r($data);
}
/**
* 人民网新闻测试
*/
function test_people(){
$url = "http://society.people.com.cn/GB/41158/13274129.html";
//$url = "http://ccnews.people.com.cn/GB/13281986.html";
//$url = "http://energy.people.com.cn/GB/13282125.html";
//$url = "http://media.people.com.cn/GB/40606/13257934.html";
$url = "http://finance.people.com.cn/GB/13282644.html";
$url = "http://env.people.com.cn/GB/13274621.html";
$url = "http://society.people.com.cn/GB/42733/13235258.html";
$url = "http://opinion.people.com.cn/GB/13280563.html";
$sourcehtml = file_get_contents($url);
$pattern = '|var link="(.*?)";|ims';
if(preg_match($pattern,$sourcehtml,$match)){
$data['comment_url'] = $match[1];
}elseif(preg_match('|<td width="30"><a target=_blank href="(.*?)" mce_href="(.*?)" >留言</a>|ims',$sourcehtml,$match)){
$data['comment_url'] = $match[1];
}
$comment_content = @file_get_contents($data['comment_url']);
if (preg_match('|<span id="pn">(/d+)</span>|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];
}
pr($data);
}
/**
* 测试xinhuanet
*/
function test_xinhuanet() {
$comment_url = "http://view.home.news.cn/comment?url=http://news.xinhuanet.com/politics/2010-11/22/c_12800354.htm";
$url = "http://news.xinhuanet.com/politics/2010-11/22/c_12800354.htm";
$url = "http://news.xinhuanet.com/politics/2010-11/22/c_12802434.htm";
$url = "http://news.xinhuanet.com/world/2010-11/22/c_12802205.htm";
$url = "http://news.xinhuanet.com/mil/2010-11/22/c_12801955.htm";
$url = "http://news.xinhuanet.com/world/2010-11/04/c_13591259.htm";
$url = "http://news.xinhuanet.com/legal/2010-11/22/c_12800225.htm";
$url = "http://news.xinhuanet.com/finance/2010-11/22/c_12802425.htm";
$url = "http://news.xinhuanet.com/lady/2010-11/22/c_12801667.htm";
$url = "http://news.xinhuanet.com/fortune/2010-11/19/c_12793392.htm";
$data['comment_url'] = "http://view.home.news.cn/comment?url=".$url;
$comment_content = file_get_contents($data['comment_url']);
if (preg_match('|评论共有(/d+)条|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];;
}
pr($data);
}
/**
* 测试新民网
*/
function test_china(){
$url = "http://news.china.com/zh_cn/focus/shglhz/11088936/20101122/16254234.html";
$url = "http://news.china.com/zh_cn/domestic/945/20101122/16254203.html";
$url = "http://news.china.com/zh_cn/history/all/11025807/20101122/16255408.html";
$url = 'http://digital.china.com/a/50054231.shtml';
$url = 'http://auto.china.com/zh_cn/gouche/wg/11026346/20101203/16275704.html';
$url = 'http://game.china.com/zh_cn/onlinegame/news/10003534/20101203/16276958.html';
$url = 'http://military.china.com/zh_cn/important/11052771/20101203/16276490.html';
$url = 'http://tech.china.com/zh_cn/news/tel/domestic/11066131/20101203/16275605.html';
$url = 'http://sports.china.com/zh_cn/football/na/other/11062529/20101203/16276317.html';
$url = 'http://culture.china.com/zh_cn/history/wenwu/11022845/20101203/16276689.html';
$url = 'http://ent.china.com/zh_cn/star/news/11052670/20101129/16266438.html';
$url = 'http://baobao.china.com/zh_cn/parent/jy/jrtt1/11077099/20101203/16276225.html';
$sourcehtml = file_get_contents($url);
if (preg_match('|<span class="chan_comment_view"><a href="(.*?)" mce_href="(.*?)"|ims',$sourcehtml,$match)) {
//pr($match);
$data['comment_url'] = $match[1];
}
if (preg_match("|includeCommentnum/('(.*?)'/);|ims",$sourcehtml,$out)) {
//pr($out);
$newid = $out[1];
$syspath = substr($newid,0,3);
$path = substr($newid,3);
//echo "newid:::{$newid}syspath::{$syspath}path:::{$path}";
$l = strlen($newid);
for ($i = 8; $i > $l; $i--) {
$path = "0".$path;
}
//http://pl.news.china.com/data/cms/162/54/23/4num.js
$js = "http://pl.news.china.com/data/{$syspath}/".substr($path,0,3).'/'.substr($path,3,2).'/'.substr($path,5,2).'/'.substr($path,7).'num.js';
$js_content = file_get_contents($js);
if(preg_match('|"commentnum":(/d+),|ims',$js_content,$outnum)){
$data['comment_num'] = $outnum[1];;
}else{
$data['comment_num'] = 0;;
}
}
pr($data);
}
/**
* 测试中国新闻网
*/
function test_chinanews() {
$url = "http://www.chinanews.com.cn/gn/2010/11-22/2671995.shtml";
$url = "http://www.chinanews.com.cn/gj/2010/11-22/2671636.shtml";
$url = "http://www.chinanews.com.cn/ty/2010/11-21/2670470.shtml";
$url = "http://www.chinanews.com.cn/it/2010/11-22/2670544.shtml";
$sourcehtml = file_get_contents($url);
if (preg_match('|<a class=noline href="(.*?)" mce_href="(.*?)"><span style="color:#15388A;" mce_style="color:#15388A;">|ims',$sourcehtml,$match)) {
$data['comment_url'] = $match[1];
$comment_content = file_get_contents($data['comment_url']);
if (preg_match('|<span style="color:#FF0000;font-weight:bold;" mce_style="color:#FF0000;font-weight:bold;" >(/d+)</span>|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];
}
}
pr($data);
}
/**
* 测试天极网
*/
function test_yesky() {
$url = "http://dc.yesky.com/475/11659475.shtml";
$url = "http://notebook.yesky.com/312/11670312.shtml";
$url = "http://cpu.yesky.com/466/11671466.shtml";
$url = "http://dc.yesky.com/415/11670415.shtml";
$url = 'http://news.yesky.com/477/11677477.shtml';
$sourcehtml = file_get_contents($url);
$pattern = "|var site = '(.*?)';.*?type = '(.*?)';.*?articletitle = '(.*?)';.*?referId = (/d+);.*?channelId = (/d+);|ims";
if (preg_match($pattern,$sourcehtml,$match)) {
list($all,$site,$type,$articletitle,$referId,$channelId) = $match;
$data['comment_url'] = "http://comment.yesky.com/more.htm?site={$site}&referId={$referId}&type={$type}&channelId={$channelId}&isValidate=1&articleUrl={$url}";
$comment_content = file_get_contents($data['comment_url']);
if (preg_match("| <td align='right' nowrap='nowrap'>.*?共(/d+)条|ims",$comment_content,$out)) {
$data['comment_num'] = $out[1];
}else{
$data['comment_num'] = 0;
}
}
pr($data);
}
function test_eval(){
$string = 'cup';
$name = 'coffee';
$str = 'This is a $string with my $name in it.';
echo $str. "/n";
eval("/$str = /"$str/";");
echo $str. "/n";
echo "<br />";
$arr = $this->test_yesky();
print_r($arr);
}
/**
* sohu新闻评论数
*/
function test_sohu(){
set_time_limit(0);
$url = 'http://news.sohu.com/20101126/n277939004.shtml';
$url = 'http://women.sohu.com/20101129/n277887754.shtml';
$url = 'http://money.sohu.com/20101129/n277974879.shtml';
$url = 'http://stock.sohu.com/20101129/n277974701.shtml';
$url = 'http://sports.sohu.com/20101129/n277975937.shtml';
$url = 'http://business.sohu.com/20101129/n277982607.shtml';
$url = 'http://fund.sohu.com/20101129/n277975935.shtml';
$url = 'http://it.sohu.com/20101129/n277972131.shtml';
$url = 'http://auto.sohu.com/20101203/n278069399.shtml';
$url = 'http://cul.sohu.com/20101203/n278068617.shtml';
$url = 'http://men.sohu.com/20101130/n278016966.shtml';
$url = 'http://learning.sohu.com/20101130/n278013946.shtml';
$url = 'http://gongyi.sohu.com/20101202/n278061618_1.shtml';
$url = 'http://health.sohu.com/20101203/n278062656.shtml';
$url = 'http://news.sohu.com/20101202/n278056978.shtml';
$url = 'http://yule.sohu.com/20101203/n278066902.shtml';
$url = 'http://chihe.sohu.com/20101201/n278033205.shtml';
$sourcehtml = file_get_contents($url);
if (preg_match('|<div class="commentLink" collection="Y"><a href="(.*?)" mce_href="(.*?)" target="_blank">|ims',$sourcehtml,$match)) {
$data['comment_url'] = $match[1];
$comment_content = file_get_contents($data['comment_url']);
if (preg_match('|<em>/((/d+)条/)</em>|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];
}else{
$data['comment_num'] = 0;
}
}elseif (preg_match('|/[<a href="(.*?)" mce_href="(.*?)" target="_blank">我来说两句</a>|ims',$sourcehtml,$match)){
$data['comment_url'] = $match[1];
if (preg_match('|<script language="javascript">comment_updateCountAll/(/'(/d+)/',|ims',$sourcehtml,$out)) {
$data['comment_num'] = $out[1];;
}else{
$data['comment_num'] = 0;
}
}
pr($data);
}
/**
* 凤凰网
*/
function test_ifeng(){
$url = 'http://news.ifeng.com/world/special/chaoxianpaojihanguo/content-2/detail_2010_11/26/3242644_0.shtml';
$url = 'http://tech.ifeng.com/3g/detail_2010_11/29/3260862_0.shtml';
$url = 'http://house.ifeng.com/toutiao/detail_2010_11/29/3260887_0.shtml';
$url = 'http://finance.ifeng.com/news/20101129/2966340.shtml';
$url = 'http://ent.ifeng.com/idolnews/special/chaonvwangbei/content-1/detail_2010_11/29/3260477_0.shtml';
$url = 'http://finance.ifeng.com/stock/qsch/20101201/2982947.shtml';
$url = 'http://auto.ifeng.com/news/report/20101202/479834.shtml';
$url = 'http://phtv.ifeng.com/program/jqgcs/detail_2010_12/02/3309963_0.shtml';
$url = 'http://culture.ifeng.com/whrd/detail_2010_12/03/3331577_0.shtml';
$url = 'http://media.ifeng.com/news/tradition/tv/detail_2010_12/03/3330239_0.shtml';
$url = 'http://expo2010.ifeng.com/dongtai/detail_2010_11/01/2958589_0.shtml';
$url = 'http://fashion.ifeng.com/art/fashion/detail_2010_12/03/3331196_0.shtml';
$url = 'http://health.ifeng.com/longevity/tieshi/job/detail_2010_12/03/3332816_0.shtml';
$url = 'http://edu.ifeng.com/news/detail_2010_12/03/3328672_0.shtml';
$url = 'http://yayun2010.ifeng.com/chinanews/detail_2010_11/27/3252198_0.shtml';
$url = 'http://games.ifeng.com/netgame/wow/hottopics/detail_2010_12/03/3330131_0.shtml';
$url = 'http://gongyi.ifeng.com/news/detail_2010_12/03/3330698_0.shtml';
$sourcehtml = file_get_contents($url);
if (preg_match("|var commentURL.*?=.*?'(.*?)'|ims",$sourcehtml,$match)) {
$data['comment_url'] = $match[1];
$comment_content = file_get_contents($data['comment_url']);
if (preg_match('|<b>(/d+)</b>|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];
}else{
$data['comment_num'] = 0;
}
}
pr($data);
}
/**
* it168
*/
function test_it168(){
$url = 'http://notebook.it168.com/a2010/1119/1128/000001128255.shtml';
$url = 'http://tech.it168.com/a2010/1206/1134/000001134375.shtml';
$url = 'http://datacenter.it168.com/a2010/1203/1133/000001133938.shtml';
$url = 'http://cio.it168.com/a2010/1203/1133/000001133947.shtml';
$url = 'http://server.it168.com/a2010/1202/1133/000001133449.shtml';
$url = 'http://net.it168.com/a2010/1203/1134/000001134020.shtml';
$url = 'http://safe.it168.com/a2010/1202/1133/000001133508.shtml';
$url = 'http://vga.it168.com/a2010/1203/1133/000001133965.shtml';
$url = 'http://power.it168.com/a2010/1201/1132/000001132899.shtml';
$url = 'http://mb.it168.com/a2010/1203/1133/000001133921.shtml';
$url = 'http://virtual.it168.com/a2010/1110/1124/000001124296.shtml';
$url = 'http://lcd.it168.com/a2010/1203/1133/000001133746.shtml';
$url = 'http://software.it168.com/a2010/1205/1134/000001134142.shtml';
$url = 'http://oa.it168.com/a2010/1203/1133/000001133787.shtml';
$url = 'http://security.it168.com/a2010/1206/1134/000001134259.shtml';
$url = 'http://storage.it168.com/a2010/1206/1134/000001134417.shtml';
$url = 'http://digital.it168.com/a2010/1202/1133/000001133389.shtml';
$url = 'http://cloud.it168.com/a2010/1206/1134/000001134289.shtml';
$url = 'http://wireless.it168.com/a2010/1014/1113/000001113476.shtml';
$url = 'http://sound.it168.com/a2010/1202/1133/000001133109.shtml';
$sourcehtml = file_get_contents($url);
if (preg_match('|评论:<a href="(.*?)" mce_href="(.*?)" target="_blank">|ims',$sourcehtml,$match)) {
$data['comment_url'] = $match[1];
$comment_content = file_get_contents($data['comment_url']);
if (preg_match('|<b>(/d+)</b>|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];
}else{
$data['comment_num'] = 0;
}
}
pr($data);
}
/**
* 中关村在线
*/
function test_zol(){
$url = 'http://mobile.zol.com.cn/203/2032041.html';
$url = 'http://news.zol.com.cn/196/1964372.html';
$url = 'http://nb.zol.com.cn/205/2054458.html';
$url = 'http://game.zol.com.cn/206/2064176.html';
$url = 'http://dcdv.zol.com.cn/205/2053179.html';
$url = 'http://mp3.zol.com.cn/205/2059590.html';
$url = 'http://power.zol.com.cn/205/2053230.html';
$sourcehtml = file_get_contents($url);
if (preg_match('|/[<a href="(.*?)" mce_href="(.*?)" class="huei12i"><span id="arti_comm_num">|ims',$sourcehtml,$match)) {
$data['comment_url'] = $match[1];
$comment_content = file_get_contents($data['comment_url']);
if (preg_match('|全部评论<span class="f12">/((/d+)条/)</span>|ims',$comment_content,$out)) {
$data['comment_num'] = $out[1];
}else{
$data['comment_num'] = 0;
}
}
pr($data);
}