现在的位置: 首页 > 综合 > 正文

Winista.HtmlParser

2013年09月05日 ⁄ 综合 ⁄ 共 8320字 ⁄ 字号 评论关闭

using Winista.Text.HtmlParser.Visitors;
using Winista.Text.HtmlParser;
using Winista.Text.HtmlParser.Util;     //包含ParserExceptiongjgjgsdfdfww222
using Winista.Text.HtmlParser.Filters;
using Winista.Text.HtmlParser.Tags;
public partial class CreateWeatherInfo : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {
        if (!Page.IsPostBack)
        {
            try
            {
                Response.Expires=600;
                Server.ScriptTimeout = 600;
                DateTime beginTime = DateTime.Now;              
                string cityCode = "",htmlSource="";              
                ITag tempTag;
               
                string todayWeather="",tomWeather="",thirdWeather="";
                SqlParameter[] para = new SqlParameter[] {
                    new SqlParameter("@cityName", SqlDbType.NVarChar,50),                   

                    new SqlParameter("@todayWeather", SqlDbType.NVarChar,300),
                    new SqlParameter("@tomWeather", SqlDbType.NVarChar,300),
                    new SqlParameter("@thirdWeather", SqlDbType.NVarChar,300)
                };
                SqlDataReader read = DataOp.ExecuteReader(DataOp.connWeatherDemo, CommandType.StoredProcedure, "up_getCityList", null);
                while (read.Read())
                {
                    todayWeather="";
                    tomWeather="";
                    thirdWeather = "";
                    cityCode = read["status"].ToString().Trim();
                    htmlSource = Tools.GetWebContent("http://www.****.com/"+cityCode+".shtml");
                    Parser parser = Parser.CreateParser(htmlSource, "GBK");  //utf-8
                    Parser parserTable = Parser.CreateParser(htmlSource, "GBK");
                    NodeFilter filter;
                    filter = new AndFilter(new TagNameFilter("table"), new HasAttributeFilter("class", "yuBaoTable"));
                    NodeFilter filterDiv = new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "tqyb_new"));
                    NodeList nodeList = parser.Parse(filterDiv);
                    if (nodeList.Count > 0)
                    {
                        #region    div class="tqyb_new" 格式的页面
                        for (int i = 0; i < nodeList.Count; i++)
                        {
                            tempTag = nodeList[i] as ITag;
                            //Response.Write(tempTag.ToPlainTextString().Trim()+"  </br>");

                            if (i == 0)
                            {                               
                                string msg = "", today1 = "", night = "";
                                today1 = WeatherAllInfo(tempTag)[0];
                                night = WeatherAllInfo(tempTag)[1];
                                todayWeather = today1 + " \n " + night;
                                todayWeather = todayWeather.Replace("&nbsp;", "");
                                //Response.Write(msg + " " + today1 + " </br>" + night + "</br>");
                            }
                            else
                                if (i == 1)
                                {
                                    string msg = "", today1 = "", night = "";
                                    today1 = WeatherAllInfo(tempTag)[0];
                                    night = WeatherAllInfo(tempTag)[1];
                                    tomWeather = today1 + " \n " + night;
                                    tomWeather = tomWeather.Replace("&nbsp;", "");
                                    //Response.Write(msg + " " + today1 + " </br>" + night + "</br>");
                                }
                                else
                                    if (i == 2)
                                    {
                                        string msg = "", today1 = "", night = "";
                                        today1 = WeatherAllInfo(tempTag)[0];
                                        night = WeatherAllInfo(tempTag)[1];
                                        thirdWeather = today1 + " \n " + night;
                                        thirdWeather = thirdWeather.Replace("&nbsp;", "");
                                        //Response.Write(msg + " " + today1 + " </br>" + night + "</br>");
                                    }
                        }
                        //Response.Write("</br></br>");
                        #endregion                       
                    }
                    else
                    {
                       #region   table格式的页面
                       NodeList nodeList1 = parserTable.Parse(filter);
                       for (int i = 0; i < nodeList1.Count; i++)
                       {
                           tempTag = nodeList1[i] as ITag;
                           if (i == 0)
                           {                              
                               TableTag table = (TableTag)tempTag;//上面判断如果得到的第一个为table                               

                               Winista.Text.HtmlParser.Tags.TableRow[] tr = table.Rows;//得到该table所有的tr
                               foreach (Winista.Text.HtmlParser.Tags.TableRow r in tr)//遍历所有tr

                               {
                                   TableColumn[] tc = r.Columns;

                                   foreach (TableColumn column1 in tc)//遍历所有的td

                                   {
                                       todayWeather += column1.ToPlainTextString().Trim() + " ";

                                   }
                                   todayWeather += " \n ";
                               }
                           }
                           else
                               if (i == 1)
                               {
                                   TableTag table = (TableTag)tempTag;//上面判断如果得到的第一个为table

                                   Winista.Text.HtmlParser.Tags.TableRow[] tr = table.Rows;//得到该table所有的tr

                                   foreach (Winista.Text.HtmlParser.Tags.TableRow r in tr)//遍历所有tr

                                   {
                                       TableColumn[] tc = r.Columns;
                                       foreach (TableColumn column2 in tc)//遍历所有的td

                                       {
                                           tomWeather += column2.ToPlainTextString().Trim() + " ";
                                       }
                                       tomWeather += " \n ";
                                   }
                               }
                               else
                                   if (i == 2)
                                   {
                                       TableTag table = (TableTag)tempTag;//上面判断如果得到的第一个为table

                                       Winista.Text.HtmlParser.Tags.TableRow[] tr = table.Rows;//得到该table所有的tr

                                       foreach (Winista.Text.HtmlParser.Tags.TableRow r in tr)//遍历所有tr

                                       {
                                           TableColumn[] tc = r.Columns;
                                           foreach (TableColumn column3 in tc)//遍历所有的td

                                           {
                                               thirdWeather += column3.ToPlainTextString().Trim() + " ";
                                           }
                                           thirdWeather += " \n ";
                                       }
                                   }
                       }
                       //Response.Write(todayWeather + tomWeather + thirdWeather+"</br></br>");
#endregion                     
                    }                  
                   
                    para[0].Value = read["cityname_cn"].ToString().Trim();
                    para[1].Value = todayWeather;
                    para[2].Value = tomWeather;
                    para[3].Value = thirdWeather;
                    DataOp.ExecuteNonQuery(DataOp.connWeatherDemo, CommandType.StoredProcedure, "up_updateweatherinfo", para);
                }
                Response.Write("<br/> ok ,开始时间:" + beginTime.ToString());
                Response.Write("<br/>结束时间:" + DateTime.Now.ToString());               

                //添加最新更新时间
                DataOp.ExecuteNonQuery(DataOp.connWeatherDemo,CommandType.Text,"insert into weather_createTime(createTime) values('"+DateTime.Now.ToString()+"')",null);
                //Response.Write("<script language:javascript>javascript:window.opener=null;window.close();</script>");   //关闭IE6不提示
                //Response.Write("<script language='javascript'>window.open('','_top');window.top.close();  </script>");    //关闭IE7不提示
            }
            catch (Exception ex)
            {
                Response.Write(ex.Message);
            }
        }

 

抱歉!评论已关闭.