现在的位置: 首页 > 综合 > 正文

python解析xml模块

2013年02月06日 ⁄ 综合 ⁄ 共 5628字 ⁄ 字号 评论关闭
<?xml version="1.0" encoding="utf-8" ?>
<root>
<childs>
<child name='first' >1</child>
<child value="2">2</child>
</childs>
</root>

第一种方式,自动遍历所有节点:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
from xml.sax.handler import ContentHandler
from xml.sax import parse
         

class TestHandle(ContentHandler):
    def __init__(self, inlist):
        self.inlist = inlist
        
    def startElement(self,name,attrs):
        print 'name:',name, 'attrs:',attrs.keys()
        
    def endElement(self,name):
        print 'endname',name
        
    def characters(self,chars):
        print 'chars',chars
        self.inlist.append(chars)
            
            
if __name__ == '__main__':
    lt = []
    parse('test.xml', TestHandle(lt))
    print lt

结果:

name: root attrs: []
chars 

name: childs attrs: []
chars 

name: child attrs: [u'name']
chars 1
endname child
chars 

name: child attrs: [u'value']
chars 2
endname child
chars 

endname childs
chars 

endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

第二种:获取根节点,按需查找指定节点:

#!/usr/bin/env python  
# -*- coding: utf-8 -*-  
from xml.dom import minidom  
xmlstr = '''<?xml version="1.0" encoding="UTF-8"?>
<hash>
    <request name='first'>/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>
'''
def doxml(xmlstr):
    dom = minidom.parseString(xmlstr)    
    print 'Dom:'    
    print dom.toxml()  
      
    root = dom.firstChild    
    print 'root:'    
    print root.toxml()  
    
    childs = root.childNodes  
    for child in childs:
        print child.toxml()
        if child.nodeType == child.TEXT_NODE:
            pass
        else:
            print 'child node attribute name:', child.getAttribute('name')
            print 'child node name:', child.nodeName
            print 'child node len:',len(child.childNodes)
            print 'child data:',child.childNodes[0].data
            print '======================================='
            print 'more help info to see:'
            for med in dir(child):
                print help(med)    
  
              
if __name__ == '__main__':  
    doxml(xmlstr)

结果:

Dom:
<?xml version="1.0" ?><hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>
root:
<hash>
    <request name="first">/2/photos/square/type.xml</request>
    <error_code>21301</error_code>
    <error>auth faild!</error>
</hash>

    
<request name="first">/2/photos/square/type.xml</request>
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:

两种方法各有其优点,python的xml处理模块太多,目前只用到这2个。

================================================补充的分割线======================================================

实际工作中发现python的mimidom无法解析其它编码的xml,只能解析utf-8的编码,而其xml文件的头部申明也必须是utf-8,为其它编码会报错误。网上的解决办法都是替换xml文件头部的编码申明,然后转换编码为utf-8再用minidom解码,实际测试为可行,不过有点累赘的感觉。

================================================写xml内容的分割线======================================================

#!\urs\bin\env python
#encoding: utf-8
from xml.dom import minidom


class xmlwrite:
    def __init__(self, resultfile):
        self.resultfile = resultfile
        self.rootname = 'api'
        self.__create_xml_dom()
    
    def __create_xml_dom(self):
        xmlimpl = minidom.getDOMImplementation()
        self.dom = xmlimpl.createDocument(None, self.rootname, None)
        self.root = self.dom.documentElement
    
    def __get_spec_node(self, xpath):
        patharr = xpath.split(r'/')
        parentnode = self.root
        exist = 1
        for nodename in patharr:
            if nodename.strip() == '':
                continue
            if not exist:
                return None
            spcindex = nodename.find('[')
            if spcindex > -1:
                index = int(nodename[spcindex+1:-1])
            else:
                index = 0
            count = 0
            childs = parentnode.childNodes
            for child in childs:
                if child.nodeName == nodename[:spcindex]:
                    if count == index:
                        parentnode = child
                        exist = 1
                        break
                    count += 1
                    continue
                else:
                    exist = 0
        return parentnode
        
        
    def write_node(self, parent, nodename, value, attribute=None, CDATA=False):
        node = self.dom.createElement(nodename)
        if value:
            if CDATA:
                nodedata = self.dom.createCDATASection(value)
            else:
                nodedata = self.dom.createTextNode(value)
            node.appendChild(nodedata)
            if attribute and isinstance(attribute, dict):
                for key, value in attribute.items():
                    node.setAttribute(key, value)   
        try:
            parentnode = self.__get_spec_node(parent)
        except:
            print 'Get parent Node Fail, Use the Root as parent Node'
            parentnode = self.root
        parentnode.appendChild(node)
    
    
    def write_start_time(self, time):
        self.write_node('/','StartTime', time)

    def write_end_time(self, time):
        self.write_node('/','EndTime', time)    
        
    def write_pass_count(self, count):
        self.write_node('/','PassCount', count)   

    def write_fail_count(self, count):
        self.write_node('/','FailCount', count)   
        
    def write_case(self):
        self.write_node('/','Case', None)   
        
    def write_case_no(self, index, value):
        self.write_node('/Case[%s]/' % index,'No', value)

    def write_case_url(self, index, value):
        self.write_node('/Case[%s]/' % index,'URL', value)
        
    def write_case_dbdata(self, index, value):
        self.write_node('/Case[%s]/' % index,'DBData', value)
        
    def write_case_apidata(self, index, value):
        self.write_node('/Case[%s]/' % index,'APIData', value)

    def write_case_dbsql(self, index, value):
        self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)
                
    def write_case_apixpath(self, index, value):
        self.write_node('/Case[%s]/' % index,'APIXPath', value)       
                        
    def save_xml(self):
        myfile = file(self.resultfile, 'w')
        self.dom.writexml(myfile, encoding='utf-8')
        myfile.close()
        
if __name__ == '__main__':
      xr = xmlwrite(r'D:\test.xml')
      xr.write_start_time('2223')
      xr.write_end_time('444')      
      xr.write_pass_count('22')
      xr.write_fail_count('33')  
      xr.write_case()
      xr.write_case()
      xr.write_case_no(0, '0')
      xr.write_case_url(0, 'http://www.google.com')   
      xr.write_case_url(0, 'http://www.google.com')   
      xr.write_case_dbsql(0, 'select * from ')
      xr.write_case_dbdata(0, 'dbtata')
      xr.write_case_apixpath(0, '/xpath')
      xr.write_case_apidata(0, 'apidata')
      xr.write_case_no(1, '1')       
      xr.write_case_url(1, 'http://www.baidu.com')   
      xr.write_case_url(1, 'http://www.baidu.com')   
      xr.write_case_dbsql(1, 'select 1 from ')
      xr.write_case_dbdata(1, 'dbtata1')
      xr.write_case_apixpath(1, '/xpath1')
      xr.write_case_apidata(1, 'apidata1')
      xr.save_xml()      
    

封装了minidom,支持通过xpath来写节点,不支持xpath带属性的匹配,但支持带索引的匹配。如:/root/child[1], 表示root的第2个child节点

抱歉!评论已关闭.