现在的位置: 首页 > 综合 > 正文

又一采集器

2017年12月26日 ⁄ 综合 ⁄ 共 14325字 ⁄ 字号 评论关闭

又一采集器,呵呵

JDBC.java

package com.baoruan;
import java.sql.*;

public class JDBC {
	Connection con;

	public Connection getConnection() {
		try {
			Class.forName("com.mysql.jdbc.Driver");
			System.out.println("数据库驱动加载成功!");
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		}
		try {
			con = DriverManager.getConnection(
					"jdbc:mysql://localhost/baoruan?useUnicode=true&characterEncoding=UTF-8", "root", "123456");

			System.out.println("数据库连接成功!");
		} catch (SQLException e) {
			e.printStackTrace();
		}
		return con;
	}

	public static void main(String[] args) {
		new JDBC().getConnection();
	}
}

JDBCConnect.java

package com.baoruan;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

public class JDBCConnect {
	static Connection con;
	static PreparedStatement sql;

	public JDBCConnect() {
		JDBC conn = new JDBC();
		con = conn.getConnection();
	}

	public ResultSet read(String sqlworld) {
		ResultSet res = null;
		try {
			sql = con.prepareStatement(sqlworld);
			res = sql.executeQuery();
			// System.out.println("执行增加,修改,删除前后数据!");
			// while(res.next()){
			// HashMap <String,String>brandlist=new HashMap<String,String>();
			// id=res.getString("id");
			// String brand=res.getString("brand");
			// String downloadlist=res.getString("downloadlist");
			/*
			 * String gameName=res.getString("gameName"); String
			 * gameType=res.getString("gameType");
			 * 
			 * System.out.println("id 为:"+id);
			 * System.out.println("gameid 为:"+gameid);
			 * System.out.println("downloadlist 为:"+downloadlist);
			 * System.out.println("gameName 为:"+gameName);
			 * System.out.println("gameType 为:"+gameType);
			 */
			// }
		} catch (Exception e) {
			e.printStackTrace();
			System.out.println("数据库读取异常");
		}
		return res;
	}

	public void update(String sqlworld) {
		try {
			sql = con.prepareStatement(sqlworld);
			sql.executeUpdate();
			sql.close();
			System.out.println("更新成功!");
		} catch (Exception e) {
			System.out.println("更新异常!");
			e.printStackTrace();
		}
	}
	/*
	 * public static void main(String[] args)throws Exception{ hellosql re=new
	 * hellosql(); // String
	 * sql="select COUNT(brand) AS GameNum from jixing where id =50"; //
	 * ResultSet res=re.read(sql); // res.last(); // int count=res.getRow(); //
	 * if(count==0){ // System.out.println("没有该值 "); // }else{ //
	 * System.out.println("有该值 "); // } // 获取行数 // // while(res.next()){ //
	 * String id=res.getString("id"); // String brand=res.getString("brand"); //
	 * String mobileType=res.getString("mobileType"); // String
	 * url=res.getString("url"); //
	 * System.out.println(id+"       "+brand+"        "
	 * +mobileType+"          "+url); // } //
	 * System.out.println(res.getInt("GameNum")); // if(k==0){ //
	 * System.out.println(k); // System.out.println("该值已存在!"); // }else{ //
	 * System.out.println(k); // System.out.println("该值可以保存!"); // } String
	 * sql2="UPDATE sr_soft SET adaptedMobileTypeIds = '88' WHERE softId =61";
	 * re.upadte(sql2); String sql1 = "select * from sr_soft where softId=61";
	 * ResultSet gameIsExist = re.read(sql1); String
	 * adaptedMobileTypeIds_constant=""; while(gameIsExist.next()){
	 * adaptedMobileTypeIds_constant
	 * =gameIsExist.getString("adaptedMobileTypeIds");
	 * System.out.println("读一下机型表"
	 * +adaptedMobileTypeIds_constant);//取adaptedMobileTypeIds字段的值 }
	 * 
	 * }
	 */
}

SavePackageThread.java

package com.baoruan;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;

public class SavePackageThread implements Runnable{
	private String url;
	private String savePath;
	
	public SavePackageThread(){
		
	}
	public SavePackageThread(String url, String savePath){
		this.url = url;
		this.savePath = savePath;
	}
	
	public void checkFile() throws Exception{
		File file = new File(savePath);
		File parentFile = file.getParentFile();
		if(!parentFile.isDirectory())parentFile.mkdirs();
		if(!file.exists())file.createNewFile();
	}
	
	 public InputStream getInputStream(){  
        InputStream is = null;  
        try{  
            if(this.url != null && !"".equals(this.url)){  
                URL url = new URL(this.url);  
                HttpURLConnection httpConn = (HttpURLConnection)url.openConnection();  
                httpConn.setRequestProperty("Connection", "Keep-Alive");  //保持一直连接  
                httpConn.setConnectTimeout(60 * 1000 * 5);                //连接超时5分钟  
                httpConn.setRequestMethod("POST");                         //以GET方式连接  
                httpConn.setAllowUserInteraction(true);  
                return new BufferedInputStream(httpConn.getInputStream(),1024*8);  
            }  
        }catch(Exception ex){  
            ex.printStackTrace();  
        }  
        return is;  
    }  
	
	@Override
	public void run() {
		try{
			checkFile();
			// 此方法只能用于HTTP协议
			File path = new File(this.savePath).getParentFile();
			if (!path.isDirectory()) {
				path.mkdirs();
			}
			try {
				URL url = new URL(this.url);
				HttpURLConnection connection = (HttpURLConnection) url
						.openConnection();
				DataInputStream in = new DataInputStream(connection
						.getInputStream());
				DataOutputStream out = new DataOutputStream(new FileOutputStream(
						this.savePath));
				byte[] buffer = new byte[4096];
				int count = 0;
				while ((count = in.read(buffer)) > 0) {
					out.write(buffer, 0, count);
				}
				out.close();
				in.close();
			} catch (Exception e) {
				path.delete();
			}
			
		}catch(Exception ex){
			ex.printStackTrace();
		}
	}
}

Client.java

package com.baoruan;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;


public class Client {
	private List<String> imageUrlList = new ArrayList<String>();
	
	private static final int BUFFER_SIZE = 1024*80;
	
	public List<String> getImageUrlList() {
		return imageUrlList;
	}
	public void setImageUrlList(List<String> imageUrlList) {
		this.imageUrlList = imageUrlList;
	}
	public static void main(String[] args) throws Exception {
		JDBCConnect connect = new JDBCConnect();
		String baseLink_left = "http://baoruan.com/download/downpage/freegamedown/gid/";//1100279~~1109419
		String baseLink_right = "/nopage/1/mid/12418";
		//1105572
		for(int i=1105572; i<1109420; i++){
			Client client = new Client();
			String urlAddress = baseLink_left + i + baseLink_right;
			System.out.println("urlAddress---------------->" + urlAddress);
			String web_Content = client.httpclient(urlAddress);
//			String web_Content = client.getStringFromReader(client.getReader(urlAddress));
			if(web_Content.length()<1000)continue;
			String gameName = ""; 					//游戏名称
			String version = ""; 					//版本号
			String description = ""; 				//游戏说明
			String category = "";					//游戏分类
			String imageurl = "";					//图片地址
			String apkurl = "";						//游戏包地址	
			String remoteApkurl = "";				//远程游戏包地址
			String model = "";
			String gameNameSub = client.indexOfContent(web_Content, "<card id=\"main\" title=\"", "-宝软网");
			if(gameNameSub == null || "".equals(gameNameSub))continue;
			if(gameNameSub.split("v\\d").length >1){
				String[] st = gameNameSub.split("v");
				gameName = st[0];
				version = st[1];
			}else{
				gameName = gameNameSub;
			}
			
			String updatesql = "select * from baoruan where gameName = '" + gameName + "'"; 
			ResultSet gameIsExist = connect.read(updatesql);
			String id="";
			while(gameIsExist.next()){
				id = gameIsExist.getString("id");
			}
			gameIsExist.last();
			if (gameIsExist.getRow() != 0) { // 如果游戏资料存在,直接跳到下载游戏包
				gameIsExist.close();
				String apkurlCopy = client.indexOfContent(web_Content, "<img src=\"http://baoruan.com/images/down.gif\" alt='' />			<a href=\"", "\">下载安装");
				if(apkurlCopy != null){
					System.out.println("apkurlCopy------------->" + apkurlCopy);
					apkurlCopy = "http://baoruan.com" + apkurlCopy;
					System.out.println("apkurlCopy---------------->" + apkurlCopy);
					String gamePackagePage = client.httpclient(apkurlCopy);
					System.out.println("gamePackagePage----------------->" + gamePackagePage);
					if(gamePackagePage != null){
						remoteApkurl = client.indexOfContent(gamePackagePage, "ontimer=\"", "\"");
					}
				}
				
				
				apkurl = "C:/upload/" + id + "/" + id + ".apk";
				new Thread(new SavePackageThread(remoteApkurl, apkurl)).start();
				if(version != "" && "".equals(version)){
					String sql = "update baoruan set version = '" + version +"' where id = " + id;
					connect.update(sql);
				}
				
				continue;
			}
			description = client.indexOfContent(web_Content, "介绍:", "<img src=\"http://baoruan.com/images/down.gif\" alt='' />");
			model = client.indexOfContent(web_Content, "当前适配机型:", "<br />");
			String categoryCopy = client.indexOfContent(web_Content, "分类:", "<a href=\"/download/cutpic/show");
			String apkurlCopy = client.indexOfContent(web_Content, "<img src=\"http://baoruan.com/images/down.gif\" alt='' />			<a href=\"", "\">下载安装");
			if(apkurlCopy != null){
				System.out.println("apkurlCopy------------->" + apkurlCopy);
				apkurlCopy = "http://baoruan.com" + apkurlCopy;
				System.out.println("apkurlCopy---------------->" + apkurlCopy);
				String gamePackagePage = client.httpclient(apkurlCopy);
				System.out.println("gamePackagePage----------------->" + gamePackagePage);
				if(gamePackagePage != null){
					remoteApkurl = client.indexOfContent(gamePackagePage, "ontimer=\"", "\"");
				}
			}
			
			
			apkurl = "C:/upload/" + i + "/" + i + ".apk";
			new Thread(new SavePackageThread(remoteApkurl, apkurl)).start();
			category = client.gameTypeindexof(categoryCopy);
			
			//处理截图
			imageurl = client.indexOfContent(web_Content, "分类:", "截图</a>");
			imageurl = "http://baoruan.com" + client.indexOfContent(imageurl, "</a><a href=\"", "\">");
			System.out.println("imageurl---------->" + imageurl);
			client.getImageUrlFromURLFirst(imageurl, "C:" + "/" +"upload" + "/" + i + "/");
			
			
			imageurl = client.getStringFromList(",");
			imageurl = imageurl.replaceAll("C:/", "");
			apkurl = apkurl.replaceAll("C:/", "");
			description = description.replaceAll("'", "‘");
			System.out.println("gameName------------->" + gameName);
			System.out.println("version------------->" + version);
			System.out.println("description------------->" + description);
			System.out.println("category------------->" + category);
			System.out.println("apkurl------------->" + apkurl);
			System.out.println("imageurl------------->" + client.getStringFromList(",")); 
			



			
			String sql = "insert into baoruan(gameName, category, imageurl, description, version, apkurl, model)values('" + gameName +"','" + category +"','" + imageurl +"','" + description +"','" + version +"','" + apkurl +"','" + model +"')";
			System.out.println("sql_----------------->" +sql);
			connect.update(sql);
		}
	}
	//去除html代码
	public String gameTypeindexof(String content) {
		if(content == "" || "".equals(content))return content;
		Pattern patt = Pattern.compile("<[^>]+>([^<]*)</[^>]+>");
		Matcher m = patt.matcher(content);
		while (m.find()) {
			content = content.replaceFirst("<[^>]+>([^<]*)</[^>]+>", m.group(1)
					.toString());
		}
		return content;
	}
	//去除html代码
	
	//拿到APK游戏包
	/*public String getGamePackageUrl(String url, String path){
		String wap_content = null;
		try {
			HttpClient httpclient = new HttpClient();
			httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2");
			GetMethod getmethod = new GetMethod(url);
//			使用系统提供的默认的恢复策略  默认连接失败后重复连接3次
			getmethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
			getmethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8");
			int code = httpclient.executeMethod(getmethod);
			if (code != HttpStatus.SC_OK) {
				throw new Exception("网页读取失败!");
//				 continue;
			}
			BufferedInputStream bis = getInputStream(url);
			FileOutputStream fos = new FileOutputStream(path);
			byte[] by = new byte[4096];
			int length = 0;
			while(-1 != (length = bis.read(by))){
				fos.write(by, 0, length);
			}
			fos.close();
			bis.close();
			
			getmethod.releaseConnection();//关闭getmethod连接
			return wap_content;
		} catch (Exception e) {
			e.printStackTrace();
			return wap_content;
		}
	}
	//拿到APK游戏包
*/	
	//正则分析
	public String indexOfContent(String content, String head, String footer) {
		int index;
		int index1;
		String newio = null;
		index = content.indexOf(head);
		if (index >= 0) {
			String inputreader1 = content.substring(index + head.length());
			index1 = inputreader1.indexOf(footer);
			if (index1 >= 0) {
				newio = inputreader1.substring(0, index1);
			}
		}
		return newio;
	}
	//正则分析
	
	/*---httpclient---*/
	public String httpclient(String url) {
		String wap_content = null;
		try {
			HttpClient httpclient = new HttpClient();
			httpclient.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2");
			GetMethod getmethod = new GetMethod(url);
//			使用系统提供的默认的恢复策略  默认连接失败后重复连接3次
			getmethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
			    new DefaultHttpMethodRetryHandler());
			getmethod.getParams().setParameter(
					HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF-8");
			int code = httpclient.executeMethod(getmethod);
			if (code != HttpStatus.SC_OK) {
				throw new Exception("网页读取失败!");
//				 continue;
			}
			BufferedReader buffer = new BufferedReader(new InputStreamReader(
					getmethod.getResponseBodyAsStream(), "UTF-8"));
			StringBuffer string_buffer = new StringBuffer();
			String snap = "";
			while ((snap = buffer.readLine()) != null) {
				string_buffer.append(snap);
			}
			wap_content = string_buffer.toString();
			getmethod.releaseConnection();//关闭getmethod连接
			return wap_content;
		} catch (Exception e) {
			e.printStackTrace();
			return wap_content;
		}

	}

	/*---httpclient---*/
	
	
	public BufferedInputStream getInputStream(String urlAddress){
		BufferedInputStream is = null;
		try{
			URL url = new URL(urlAddress);
			HttpURLConnection httpConn = (HttpURLConnection)url.openConnection();
			   httpConn.setRequestProperty("Connection", "Keep-Alive");  //保持一直连接  
               httpConn.setConnectTimeout(60 * 1000 * 5);                //连接超时5分钟  
               httpConn.setRequestMethod("GET");                         //以GET方式连接  
               httpConn.setAllowUserInteraction(true);  
           is = new BufferedInputStream(httpConn.getInputStream(), Client.BUFFER_SIZE);
		}catch(Exception ex){
			return null;
		}
		return is;
	}
	
	public String getStringFromReader(Reader reader){
		if(reader == null)return null;
		StringBuffer sb = new StringBuffer();
		try{
			BufferedReader br = (BufferedReader)reader;
			String str = null;
			while(null != (str = br.readLine())){
				sb.append(str);
			}
		}catch(Exception ex){
			ex.printStackTrace();
		}
		
		return sb.toString();
	}
	
	public void getImageUrlFromURLFirst(String urlAddress,String savePath){
		if(urlAddress == null || "".equals(urlAddress))return;
		String content = httpclient(urlAddress);
		if(content == null || "".equals(content))return;
		System.out.println("content------------>" + content);
		String urlSpan = indexOfContent(content,".jpg\" /><br />","图片经过压缩");
		if(urlSpan == null || "".equals(urlSpan))return;
		if(urlSpan.split("<a href = \"").length >1){
			String[] urlList = urlSpan.split("<a href = \"");
			for(String url : urlList){
				if(url.length()<10)continue;
				System.out.println(url.split("\">")[0]);
				getImageUrlFromURL("http://baoruan.com" + url.split("\">")[0], savePath);
			}
		}
	}
	
	public void getImageUrlFromURL(String urlAddress,String savePath){
		if(urlAddress == null || "".equals(urlAddress))return;
		String content = httpclient(urlAddress);
		if(content == null || "".equals(content))return;
		System.out.println("content------------>" + content);
		String hotaddress = indexOfContent(content,"-=截图欣赏=-<br /><img src=\"","\" />");
		if(hotaddress == null || "".equals(hotaddress))return;
		
		if(hotaddress.indexOf(".jpg")>0){
			String imageUrl = getImageName(savePath);
			new Thread(new SavePackageThread(hotaddress, imageUrl)).start();
			this.imageUrlList.add(imageUrl);
		}
	}
	public String getImageName(String savePath){
		int i = 1;
		while(new File(savePath + i + ".jpg").exists()){
			i++;
		}
		String imageName = savePath + i + ".jpg";
		return imageName;
	}
	public String getStringFromList(String joinSign){
		StringBuffer sb = new StringBuffer();
		for(String str : this.imageUrlList){
			sb.append(str + joinSign);
		}
		return sb.toString();
	}
}

抱歉!评论已关闭.