现在的位置: 首页 > 综合 > 正文

用html parser 来获取HTML网页中Form的各个属性名 值组

2013年12月04日 ⁄ 综合 ⁄ 共 2135字 ⁄ 字号 评论关闭

 1.下载HtmlParser包 地址:http://prdownloads.sourceforge.net/htmlparser/htmlparser1_6_20060610.zip?download

2、class FormElements {
 private String _elementName;

 private String _elementValue;

 private String _elementType;
……

}
 public ArrayList<FormElements> httpParser(String content) {
  ArrayList<FormElements> ret = new ArrayList<FormElements>();

  Parser myParser;
  NodeList nodeList = null;

  myParser = Parser.createParser(content, "GBK");

  NodeFilter inputFilter = new NodeClassFilter(InputTag.class);
  NodeFilter selectFilter = new NodeClassFilter(SelectTag.class);

  OrFilter lastFilter = new OrFilter();
  lastFilter
    .setPredicates(new NodeFilter[] { selectFilter, inputFilter });
  try {
   nodeList = myParser.parse(lastFilter);
  } catch (ParserException e) {
   e.printStackTrace();
  }

  Node[] nodes = nodeList.toNodeArray();

  for (int i = 0; i < nodes.length; i++) {
   Node anode = (Node) nodes[i];
   FormElements fe = new FormElements();
   if (anode instanceof SelectTag) {
    SelectTag selectnode = (SelectTag) anode;
    Vector v = selectnode.getAttributesEx();

    NodeList nl = selectnode.getChildren();
    Node[] nl_nodes = nl.toNodeArray();
    int optNum = 0;
    String select_value = "";
    for (int j = 0; j < nl_nodes.length; j++) {
     Node optnode = (Node) nl_nodes[j];
     if (optnode instanceof OptionTag) {
      optNum++;
      OptionTag opttag = (OptionTag) optnode;
      Vector vv = opttag.getAttributesEx();
      if (vv.toString().indexOf("selected") != -1)
       select_value = opttag.getOptionText();
     }
    }
    fe.set_elementName(selectnode.getAttribute("name"));
    fe.set_elementValue(select_value);
    fe.set_elementType("select");

   } else if (anode instanceof InputTag) {
    InputTag inputnode = (InputTag) anode;
    Vector v = inputnode.getAttributesEx();
    if ((v.toString().indexOf("type=checkbox") != -1)
      && (v.toString().indexOf("checked") == -1)) {
     fe.set_elementType("checkbox");
     continue;
    } else if ((v.toString().indexOf("type=radio") != -1)
      && (v.toString().indexOf("checked") == -1)) {
     fe.set_elementType("radio");
     continue;
    } else {
     fe.set_elementType("input");
    }
    fe.set_elementName(inputnode.getAttribute("name"));
    fe.set_elementValue(inputnode.getAttribute("value"));
   }
   ret.add(fe);
  }

  return ret;
 }

抱歉!评论已关闭.