现在的位置: 首页 > 综合 > 正文

C# 获取网页标题title的代码

2018年02月06日 ⁄ 综合 ⁄ 共 15082字 ⁄ 字号 评论关闭

 

using System;
using System.Net;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Web;
using System.Web.SessionState;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.HtmlControls;
using System.Data;
using System.Data.OleDb;
using System.IO;
using System.Text;
using System.Globalization;
using System.Text.RegularExpressions;

namespace GETURL
  
{
   
public class getText : System.Web.UI.Page 
   
{
    
protected System.Web.UI.WebControls.Label label_mess;
    
protected System.Web.UI.WebControls.Label content;
    
protected System.Web.UI.WebControls.Label MetaUrl;
    
protected System.Web.UI.WebControls.Label GetTitle;
    
protected System.Web.UI.WebControls.Label label_Title;
    
protected System.Web.UI.WebControls.Label startSpider;
    
protected System.Web.UI.WebControls.TextBox searchurl;
    
protected System.Web.UI.WebControls.Button urlButton;
    
protected System.Web.UI.WebControls.Label txtsiteurl;

    
private void Page_Load(Object sender,EventArgs e)
    
{
       
if(Request.QueryString["url"]!=null && Request.QueryString["url"].ToString()!="")
       
{
        searchurl.Text
=Request.QueryString["url"].ToString();
        getHTTP(sender,e);
       }

    }

    
public void getHTTP(Object sender,EventArgs e)
    
{
     startSpider.Text
="";
     GetTitle.Text
="";
     MetaUrl.Text
="";
     
//string host_all_Path=Request.ServerVariables["PATH_TRANSLATED"];
     
//string host_aPath=Request.ServerVariables["PATH_INFO"];
     
//string host_IP=Request.ServerVariables["REMOTE_ADDR"];
     
//string host_Host_IP=Request.ServerVariables["LOCAL_ADDR"];
     
//string host_NAME=Request.ServerVariables["SERVER_NAME"];
     if(searchurl.Text!="")
     
{
             
      txtsiteurl.Text
="--spider url'Result!";
      
string aUrl;
      aUrl
=searchurl.Text;
      
string[] txtspiderurl=aUrl.Split('/');
           
      
if(txtspiderurl[0].ToString()!="http:")
      
{
       label_mess.Text
="<hr>Url form must match'http://' ahead!!";
       content.Text
="";
      }
   
      
else
      
{
                  
       
if(txtspiderurl.Length>=3)
       
{
//=========================url spider start==================
        if(txtspiderurl[2].ToString()!="" && txtspiderurl[1].ToString()=="")
        
{
                 
         
string description;
         
string keywords;
         
string title_site;
         
//about the url to spider the hostname
         
//and his IP_ADDR 
         
//and the the url's Title 
         
//the url's description,and the keywords.
         
//and last to show the whole site's contents.
         string hosturl=txtspiderurl[2].ToString();
                       
         
string defaultLanguage;
         
//defaultLanguage=Request.ServerVariables["REMOTE_ADDR"];
         
//startSpider.Text="<br>d:    "+defaultLanguage;
         HttpWebRequest myReq=(HttpWebRequest)WebRequest.Create(aUrl);
         
//(([a-z0-9A-Z]|/-)+/.)+[a-z0-9A-Z]{1,4}$
         Regex match_hosturl=new Regex("^(/w+(-/w+)*)(/.(/w+(-/w+)*))*(/?/S*)?(|/.)$");
         
try
         
{
          
if(match_hosturl.IsMatch(hosturl))
          
{
           label_mess.Text
="<hr>You spidering the site:<font color=red size='3'>"+aUrl+"</font><br>";
//-------------------------------look the domain is formed by NUMERIC-----
                         
                              Regex a_UrlRegex
=new Regex(@"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0).(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$");
                            
            
if(a_UrlRegex.IsMatch(hosturl))
            
{
             IPHostEntry IP_To_HostName
=Dns.GetHostByAddress(hosturl);
             content.Text
="Host Name: <font color=red size='3'> "+IP_To_HostName.HostName+"</font><hr color='red'>";  
            }

            
else
            
{
             
string s="";
             content.Text
+="Host domain:  <font color=red size='3'> "+hosturl+"</font><hr color='red'>";
             System.Net.IPAddress[] addressList
=Dns.GetHostByName(hosturl).AddressList; 
             
for (int i = 0; i < addressList.Length; i ++
         

          s 
+= addressList[i].ToString(); 
         }
  
             content.Text
+="Host IP:&nbsp;<font color=red size=3>"+s+"</font><hr color=red>";
            }

//---------------------------------end--the --program application--------------------------------------
                              
            HttpWebResponse urlHttpWebRes
=(HttpWebResponse)myReq.GetResponse();
            myReq.ContentType
="application/x-www-form-urlencoded";                 
            Stream urlStream
=urlHttpWebRes.GetResponseStream();
            Encoding encode
=System.Text.Encoding.GetEncoding("utf-8");
            StreamReader urlStreamReader;
            
//Encoding.Default
            if (a_UrlRegex.IsMatch(hosturl))
            
{
              
if(hosturl=="127.0.0.1" || hosturl=="172.19.23.14")
              
{
               
if (aUrl.IndexOf("?")!=-1)
               
{
                
string[] getStringAhead=aUrl.Split('?');
                
string[] getStringHtm=getStringAhead[0].ToString().Split('.');
                
if(getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="htm" || getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="html")
                 
{
                  urlStreamReader
=new StreamReader(urlStream,Encoding.Default);
                 }

                 
else
                 
{
                  urlStreamReader
=new StreamReader(urlStream,encode);
                 }

               }

               
else
               
{
                
string[] getStringHtm=aUrl.Split('.');
                
if(getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="htm" || getStringHtm[getStringHtm.Length-1].ToString().ToLower()=="html")
                
{
                 urlStreamReader
=new StreamReader(urlStream,Encoding.Default);
                }

                
else
                
{
                 urlStreamReader
=new StreamReader(urlStream,encode);
                }

               }

               
//urlStreamReader=new StreamReader(urlStream,Encoding.Default);
              }

              
else
              
{
               urlStreamReader
=new StreamReader(urlStream,Encoding.Default);
              }

            }
 
           
//The a to z URL 
            else
            
{
             
if(hosturl.ToLower()=="microsoft.com" || hosturl.ToLower()=="www.microsoft.com")
             
{
              urlStreamReader
=new StreamReader(urlStream,encode);
             }

             
else
             
{
              urlStreamReader
=new StreamReader(urlStream,Encoding.Default);
             }

            }

            
//The a to z URL End                     
            StringBuilder urlContent=new StringBuilder();
            Char[] getChar
=new Char[256];
            
int Count=urlStreamReader.Read(getChar,0,256); 
                                  
            
while (Count>0)
            
{
             String line
=new String(getChar,0,Count);

             urlContent.Append(line);
             Count
=urlStreamReader.Read(getChar,0,256);
                                    
             
//Regex getTitle=new Regex(@"<title>",RegexOptions.IgnoreCase);
             
//Regex endTitle=new Regex(@"</title>",RegexOptions.IgnoreCase);
 
//***********************************Get the Site's Title Name*******Start******************************************* 
//*****************************************Writed 2005-5-19*******Author WeisNet System Information************************                                   
                                  
              Regex title_aReg
=new Regex(@"<title");
              Regex title_bReg
=new Regex(@"</title");
                                     
              
int thefirstTitleIndexOf;
              String urlContentString;
              String urlContentStringToLower;
              urlContentString
=urlContent.ToString();
              urlContentStringToLower
=urlContentString.ToLower();
              
if(title_aReg.IsMatch(urlContentStringToLower) && title_bReg.IsMatch(urlContentStringToLower))
              
{
               thefirstTitleIndexOf
=urlContentStringToLower.IndexOf("title");
               
int thelastTitleIndexOf;
               thelastTitleIndexOf
=urlContentStringToLower.IndexOf("/title");
               
//**************Check first title left is have space *********88
               string testIfthisIsTagleft;
               
string getStringfromfirstTitle;
               
//TOlower string 'urlContentStringToLower'->urlContentString 0xDDDIFCIFWEISNET____
               getStringfromfirstTitle=urlContentString.Substring(thefirstTitleIndexOf);
               testIfthisIsTagleft
=getStringfromfirstTitle.Substring(5,1);
               
//**************Check first title left is have space ***End******
               if(testIfthisIsTagleft==">")
               
{
                
                
int getTitleNum;
                getTitleNum
=thelastTitleIndexOf-1-thefirstTitleIndexOf-6;

                
string getTitleName;
                
//TOlower string 'urlContentStringToLower'->urlContentString 0xDDDIFCIFWEISNET____
                getTitleName=urlContentString.Substring(thefirstTitleIndexOf+6,getTitleNum);
                                         
                GetTitle.Text
="Spider Title :<font color='red' size='3'>"+getTitleName+"</font>--"+urlContentString.Length/1024+"KB<hr color=red>";
               }


               
else
               
{
                
int getTitleNum;
                
int isHaveSpacetitlerightTagNum;
                
int lastTitlefromElseNum;
                isHaveSpacetitlerightTagNum
=getStringfromfirstTitle.IndexOf(">");

                lastTitlefromElseNum
=getStringfromfirstTitle.IndexOf("</title");

                getTitleNum
=lastTitlefromElseNum-isHaveSpacetitlerightTagNum-1;

                
string getTitleName;

                getTitleName
=getStringfromfirstTitle.Substring(isHaveSpacetitlerightTagNum+1,getTitleNum);
                                           
                GetTitle.Text
="Spider Title :<font color='red' size='3'>"+getTitleName+"</font>--"+urlContentString.Length%1024+"KB<hr color=red>";
               }

              }

              
else
              
{
               GetTitle.Text
="No Title Document!"
              }

//***************************************Get the Site's Title Name*****End***2005-8-13***********************************
//****************************************Get The Site's Charset Code*******Start******2005-5-23*****************************
              string getCharsetValue;
              String bString;
              bString
=urlContent.ToString();
              String toLowerString;
              toLowerString
=bString.ToLower();
              Regex meta_first_last
=new Regex("<meta");
              
int getFirstMetaNum;
              
int getLastMetaNum;
              
if(meta_first_last.IsMatch(toLowerString))
              
{
               getFirstMetaNum
=toLowerString.IndexOf("<meta");
               getLastMetaNum
=toLowerString.LastIndexOf("<meta");
               
string getStringFromLastMeta;
               getStringFromLastMeta
=toLowerString.Substring(getLastMetaNum+1);
               
int getLastMetaleftTagNum;
               getLastMetaleftTagNum
=getStringFromLastMeta.IndexOf(">");
               
string MetaAllString;
               
//OK WeisNet 0XDCNDIIGMT 08:00IKABIDKF2005-9-20 16:26:37JWEISNET__
               MetaAllString=bString.Substring(getFirstMetaNum,getLastMetaleftTagNum+2+getLastMetaNum-getFirstMetaNum);
               
string formatMetaStringNoSpace;
               formatMetaStringNoSpace
=Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(Regex.Replace(MetaAllString," +",""),"<meta","<meta "),"http-equiv"," http-equiv"),"content"," content")," content-type","content-type")," +"," ");
               
if (formatMetaStringNoSpace.IndexOf("http-equiv=")!=-1 && formatMetaStringNoSpace.IndexOf("content=")!=-1)
               MetaUrl.Text
="OK"
               
else
               MetaUrl.Text
="NO"
                
              }

              
else
              
{
               MetaUrl.Text
="Not Charset Code";
              }

              
//MetaUrl.Text="ok";           
//***************************************Get The Site's CharsetCode****End**************************************************
                                   
             }

             
//startSpider.Text="<hr>content:  " + urlContent.ToString()+"<hr color='red'>";
             urlStreamReader.Close();
             urlStream.Close();
             urlHttpWebRes.Close();
                                   
            }

            
else
            
{
             label_mess.Text
="";
             content.Text
="<hr>Host domain Wrong!";
            }

           }

           
catch(ArgumentException exp) //WEBSITE ARGUMENTEXCEPTION'S==WEISNET OX12ACEB0068-----Start---->
           {
            Console.WriteLine(
"The Urls Spider has Time Out!,Try Again...");
            Console.WriteLine(exp.Message);
            
//content.Text="The Urls Spider has Time Out!,Try Again...<br>";
            
//startSpider.Text="Or you Check the Url is true you want Test!<br>";
           }

           
catch(WebException exp)
           
{
            Console.WriteLine(
"The Urls Spider has Time Out!,Try Again...");
            Console.WriteLine(exp.Message);
            Console.WriteLine(exp.Status);
            
//content.Text="The Urls Spider has Time Out!,Try Again...<br>";
            
//startSpider.Text="Or you Check the Url is true you want Test!<br>";
           }

           
catch(Exception exp)
           
{
            Console.WriteLine(
"The Urls Spider has Time Out!,Try Again...");
            Console.WriteLine(exp.Message);
            
//content.Text="The Urls Spider has Time Out!,Try Again...<br>";
            
//startSpider.Text="Or you Check the Url is true you want Test!<br>";
           }
 //WEBSITE ARGUMENTEXCEPTION'S==WEISNET OX12ACEB0068-------------------------------End------>
                         
//===============================================Url spider Ending=====================================================
          }

          
else
          
{
           label_mess.Text
="<hr>Host Domain must Exists!!!";
           content.Text
="";
          }

         }

         
else
         
{
          label_mess.Text
="<hr>Please write the whole url text!";
          content.Text
="";
         }
    
        }

       }

       
else
       
{
        label_mess.Text
="<hr>Please enter the URL";
        content.Text
="";
       }
 
           
     }
    
//=========================Ending Application========================
   WeisNet WebTools
 }

}






/*---------------------------------------------------Power By WeisNet System-----------------------------------------------*/

抱歉!评论已关闭.