/// <summary>
/// 读取html页面内容
/// </summary>
/// <param name="uri">网址</param>
/// <param name="xpath">xpath标签</param>
/// <returns></returns>
private string GetWellFormedHTML(string uri, string xpath)
{
StreamReader sReader = null;//读取字节流
StringWriter sw = null;//写入字符串
SgmlReader reader = null;//sgml读取方法
XmlTextWriter writer = null;//生成xml数据流
try
{
if (uri == String.Empty)
uri = "http://www.ypshop.net/list--91-940-940--search-1.html";
WebClient webclient = new WebClient();
webclient.Encoding = Encoding.UTF8;
//页面内容
string strWebContent = webclient.DownloadString(uri);
reader = new SgmlReader();
reader.DocType = "HTML";
reader.InputStream = new StringReader(strWebContent);
sw = new StringWriter();
writer = new XmlTextWriter(sw);
writer.Formatting = Formatting.Indented;
while (reader.Read())
{
if (reader.NodeType != XmlNodeType.Whitespace)
{
writer.WriteNode(reader, true);
}
}
return sw.ToString();
}
catch (Exception exp)
{
writer.Close();
reader.Close();
sw.Close();
sReader.Close();
return exp.Message;
}
}
#endregion