现在的位置: 首页 > 综合 > 正文

提取网页的html

2013年03月05日 ⁄ 综合 ⁄ 共 2064字 ⁄ 字号 评论关闭

这是从网上找的,感觉有用就放在这儿!

using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.IO;
using System.Net;
using System.Text;
namespace MyCsStudy
{
    class Program
    {
        /// <summary>
        /// 简单网络爬虫程序
        /// </summary>
        /// <param name="url"></param>
        /// <param name="charset">编码 可以为空</param>
        /// <returns></returns>
        public static string Fetch(string url, string charset)
        {
            Encoding encoding;
            HttpWebRequest request;
            HttpWebResponse response = null;
            Stream resStream = null;
            StreamReader sr = null;
            string result = string.Empty;
            try
            {
                request = (HttpWebRequest)HttpWebRequest.Create(url);
                response = (HttpWebResponse)request.GetResponse();
                resStream = response.GetResponseStream();
                if (!string.IsNullOrEmpty(charset))
                {
                    encoding = Encoding.GetEncoding(charset);
                }
                else if (!string.IsNullOrEmpty(response.CharacterSet))
                {
                    encoding = Encoding.GetEncoding(response.CharacterSet);
                }
                else
                {
                    encoding = Encoding.Default;
                }
                sr = new StreamReader(resStream, encoding);
                result = sr.ReadToEnd();
            }
            //catch (Exception ex)         
            //{            
            //    throw ex;
            //}           
            finally
            {
                if (sr != null)
                {
                    sr.Close();
                }
                if (resStream != null)
                {
                    resStream.Close();
                }
                if (response != null)
                {
                    response.Close();
                }
            }
            return result;
        }

        static void Main(string[] args)
        {
            stringwebSite=@"http://www.google.cn"; //这里url必须带上协议
            string strHTML = Fetch(webSite,null);
           
            Console.Write(strHTML);
            Console.ReadLine();
        }
    }
}

 

抱歉!评论已关闭.