现在的位置: 首页 > 综合 > 正文

C#清除html标记

2013年09月05日 ⁄ 综合 ⁄ 共 1141字 ⁄ 字号 评论关闭

private string StripHTML(string strHtml)
   {
    string [] aryReg ={
          @"<script[^>]*?>.*?</script>",

          @"<(///s*)?!?((/w+:)?/w+)(/w+(/s*=?/s*(([""'])(//[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?>",
          @"([/r/n])[/s]+",
          @"&(quot|#34);",
          @"&(amp|#38);",
          @"&(lt|#60);",
          @"&(gt|#62);",
          @"&(nbsp|#160);",
          @"&(iexcl|#161);",
          @"&(cent|#162);",
          @"&(pound|#163);",
          @"&(copy|#169);",
          @"&#(/d+);",
          @"-->",
          @"<!--.*/n"
        
         };

    string [] aryRep = {
           "",
           "",
           "",
           "/"",
           "&",
           "<",
           ">",
           " ",
           "/xa1",//chr(161),
           "/xa2",//chr(162),
           "/xa3",//chr(163),
           "/xa9",//chr(169),
           "",
           "/r/n",
           ""
          };

    string newReg =aryReg[0];
    string strOutput=strHtml;
    for(int i = 0;i<aryReg.Length;i++)
    {
     Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
     strOutput = regex.Replace(strOutput,aryRep[i]);
    }

    strOutput.Replace("<","");
    strOutput.Replace(">","");
    strOutput.Replace("/r/n","");

    return strOutput;
   }

 

抱歉!评论已关闭.