现在的位置: 首页 > 综合 > 正文

从源代码中提取或过滤网页标签

2013年09月11日 ⁄ 综合 ⁄ 共 1626字 ⁄ 字号 评论关闭

1.删除非图片标签

先把img标签转换成特定字符,再其他的html中 <和> 过滤掉,再把刚才的特定字符转换回来

html   =   html.replace(// <img/g, "♂ ");                   ///替换 " <img "
html   =   html.replace(/(♂[^> ]*)> /g, "$1♀ ");     ///替换 "> "
alert(html);
html   =   html.replace(// <[^> ]*> /g, " ");                 ///过滤其它所有 " <...> "标签
alert(html);
html   =   html.replace(/♂/g, " <img ");                     ///还原 " <img "
html   =   html.replace(/♀/g, "> ");                           ///还原 "> "

 

2.正则表达式过滤图片标签 自己所写

Function RemoveImg(ByVal str As String) As String
    Dim re
    Set re = New RegExp
    re.IgnoreCase = True
    re.Global = True
    re.Pattern = "<img[^>]*>"
    RemoveImg = re.Replace(str, " ")
End Function

 

Function RemoveScript(ByVal str As String) As String '过滤脚本
    Dim re
    Set re = New RegExp
    re.IgnoreCase = True
    re.Global = True
    re.Pattern = "<script[/w/W]*script>"
    RemoveScript = re.Replace(str, " ")
End Function

Function RemoveEmbed(ByVal str As String) As String '过滤嵌入多媒体
    Dim re
    Set re = New RegExp
    re.IgnoreCase = True
    re.Global = True
    re.Pattern = "<embed[/w/W]*embed>"
    RemoveEmbed = re.Replace(str, " ")
End Function

Function RemoveObject(ByVal str As String) As String '过滤嵌入多媒体
    Dim re
    Set re = New RegExp
    re.IgnoreCase = True
    re.Global = True
    re.Pattern = "<object[/w/W]*object>"
    RemoveEmbed = re.Replace(str, " ")
End Function

 

3.字符串遍历过滤图片标签

Private Function RemoveImg(ByVal str As String) As String
    '不显示图片
    Dim i As Integer
    Dim tempSource, tempStr As String
    Dim img() As String
    tempSource = str
    img = Split(str, "<img")
    For i = 1 To UBound(img) - 1
        tempStr = Split(img(i), ">")(0) '<img> 标签的结束
        tempSource = Replace(tempSource, "<img" & tempStr & ">", "", 1, -1, 1)
    Next
    RemoveImg = tempSource
End Function

 

4.操作页面元素

for(i=0;i<document.getElementsByTagName.length;i++){
    document.getElementsByTagName("input")[i].style.background="#CCCCCC";

}

抱歉!评论已关闭.