现在的位置: 首页 > 综合 > 正文

C#的一个URL加载器,能处理编码、相对地址解析、GET/POST、HTML的include、页面重定向

2013年08月31日 ⁄ 综合 ⁄ 共 5214字 ⁄ 字号 评论关闭
要让机器模拟上网,首要的问题是解决HTTP请求响应,看我们的Url加载器,功能比较强。它考虑了编码、URL的相对地址解析(见RFC),还可以POST数据,还有HTML里的<!--include-->,还有<head>里的重定向,很好用的。

以下代码随便用吧,咱们已经拿这段代码申请过著作权拉。

  1        /// <summary>
  2        /// 最基本的Url加载函数,其它重载函数均调用它
  3        /// </summary>
  4        /// <param name="url"></param>
  5        /// <param name="encoding"></param>
  6        /// <param name="postdata"></param>
  7        /// <param name="include">是否在客户端包含include文件</param>
  8        /// <param name="redirectioncounter">计算重定向的次数</param>
  9        /// <returns></returns>

 10        public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
 11        {
 12            string str;
 13
 14            string url=uo.Url;
 15            HttpWebRequest request;
 16            HttpWebResponse response;
 17
 18            //采用HTTP GET或者POST
 19            if (postdata == null)
 20                postdata = "";
 21            if (postdata.Length == 0)//HTTP GET
 22            {
 23                try
 24                {
 25                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
 26                }

 27                catch
 28                {
 29                    return "";
 30                }

 31
 32                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
 33
 34                //超时异常发生在这里
 35                try
 36                {
 37                    response = (HttpWebResponse)request.GetResponse();
 38                    //uo.Url = response.ResponseUri.ToString();
 39                }

 40                catch
 41                {
 42                    return "";
 43                }

 44                
 45                System.IO.Stream stream = response.GetResponseStream();
 46
 47                Encoding source;
 48                try
 49                {
 50                    source = Encoding.GetEncoding(encoding);
 51                }

 52                catch
 53                {
 54                    source = Encoding.UTF8;
 55                }

 56
 57                StreamReader sr = new StreamReader(stream, source);
 58                try
 59                {
 60                    str = sr.ReadToEnd();
 61                }

 62                catch 
 63                {
 64                    return "";
 65                }

 66                sr.Close();
 67                stream.Close();
 68            }

 69            else//HTTP POST
 70            {
 71                try
 72                {
 73                    ASCIIEncoding asciiencoding = new ASCIIEncoding();
 74                    byte[] bytes = asciiencoding.GetBytes(postdata);
 75
 76                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
 77                    request.Method = "POST";
 78                    request.ContentType = "application/x-www-form-urlencoded";
 79                    request.ContentLength = postdata.Length;
 80
 81                    Stream poststream = request.GetRequestStream();
 82                    poststream.Write(bytes, 0, bytes.Length);
 83                    poststream.Close();
 84
 85                    response = (HttpWebResponse)request.GetResponse();
 86
 87                    StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
 88                    str = sr.ReadToEnd();
 89                    response.Close();
 90                }

 91                catch
 92                {
 93                    return "";
 94                }

 95            }

 96
 97            uo.Url = response.ResponseUri.ToString();
 98
 99            //在客户端包含include文件
100            if (include)
101            {
102                System.Text.RegularExpressions.Regex regex = new Regex(@"<!--/W*include.*?-->", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
103                MatchCollection mc = regex.Matches(str);
104                if (mc.Count > 0)
105                {
106                    System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=/").*(?=/")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
107
108                    string[] segments = regex.Split(str);
109
110                    StringBuilder sb = new StringBuilder();
111                    sb.Append(segments[0]);
112                    for (int i = 1; i <= mc.Count; i++)
113                    {
114                        string s = mc[i - 1].Value;
115                        string newurl = urlregex.Match(s).Value;
116                        UrlOperation newuo = uo.Forward(newurl);
117                        string included = LoadUrl(ref newuo, encoding, ""true);
118                        sb.Append(included);
119                        sb.Append(segments[i]);
120                    }

121
122                    str = sb.ToString();
123                }

124            }

125
126            //页面重定向
127            string redirection=GetRedirection(str).Trim();
128            if (redirection.Length > 0&&redirectioncounter<5)
129            {
130                uo=uo.Forward(redirection);
131                return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
132            }

133            else
134                return str;
135        }

136

 

抱歉!评论已关闭.