为了解析xml,可以使用Linux下默认安装的libxml2。
/* a.c 功能:利用libxml2解析xml文件 */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <libgen.h> #include <libxml/xmlmemory.h> #include <libxml/parser.h> #include <libxml/xpath.h> int GetCurFilePath(char *lpOut) // get full path of the executable file { char chPath[BUFSIZ] = {0}; int nRetVal = readlink("/proc/self/exe", chPath, sizeof(chPath)); // get full path of the current-executable file if(nRetVal < 0) { strcpy(lpOut, "."); return -1; } else { strcpy(lpOut, chPath); return 0; } } int GetCurDir(char *lpOut) // get directory-path of current executable-file { char chPath[BUFSIZ] = { 0 }; if( GetCurFilePath(chPath) < 0 ) return - 1; dirname(chPath); // dirname will change value of "chPath"(contain result) strcpy(lpOut, chPath); // copy result to out-param return 0; } xmlDocPtr getdoc(char *docname) // 根据文件名得到文档指针 { xmlDocPtr doc; doc = xmlParseFile(docname); if(doc == NULL) { fprintf(stderr, "Document not parsed successfully.\n"); return NULL; } return doc; } // 在文档doc中解析xpath表达式,返回结果集指针 xmlXPathObjectPtr getnodeset(xmlDocPtr doc, xmlChar *xpath) { xmlXPathContextPtr context; xmlXPathObjectPtr result; context = xmlXPathNewContext(doc); if(context == NULL) { printf("Error in xmlXPathNewContent\n"); return NULL; } result = xmlXPathEvalExpression(xpath, context); // 在context中解析表达式xpath xmlXPathFreeContext(context); // 释放context if(result == NULL) { printf("Error in xmlXPathEvalExpression\n"); return NULL; } if(xmlXPathNodeSetIsEmpty(result->nodesetval)) // 解析表达式的结果集为空 { xmlXPathFreeObject(result); printf("No result\n"); return NULL; } return result; } // 解析xmlPath路径的结点 void testReadXmlDoc(char *filepath, char *xmlPath) { xmlDocPtr doc = getdoc(filepath); if(NULL == doc) return ; xmlChar *xpath = (xmlChar*) xmlPath; xmlXPathObjectPtr result = getnodeset(doc, xpath); // 获取结果集 if(result) { xmlNodeSetPtr nodeset = result->nodesetval; xmlChar *name, *value; printf("nodeset->nodeNr = %d\n", nodeset->nodeNr); // 打印结果集中结点个数 for(int i = 0; i < nodeset->nodeNr; i++) { xmlNodePtr cur = nodeset->nodeTab[i]; // products printf("cur->name = %s\n", cur->name); cur = cur->xmlChildrenNode; while(cur) { if(xmlStrcmp(cur->name, (const xmlChar*) "text")) // cur->name不为"text" { printf("cur->name = %s\t", cur->name); name = xmlGetProp(cur, (const xmlChar*) "name"); // 获取属性值 value = xmlGetProp(cur, (const xmlChar*) "value"); printf("name = %s, value = %s\n", name, value); xmlFree(name); xmlFree(value); } cur = cur->next; } printf("\n"); } xmlXPathFreeObject(result); } xmlFreeDoc(doc); xmlCleanupParser(); } int main(void) { char curDir[100] = {0}; char docname[100] = {0}; GetCurDir(curDir); strcpy(docname, curDir); strcat(docname, "/dprod.xml"); testReadXmlDoc(docname, "/allproducts/products"); return EXIT_SUCCESS; }
makefile文件:
CC=gcc CFLAGS= BIN=a INC=/usr/include/libxml2 $(BIN): $(BIN).c $(CC) $(CFLAGS) -o $(BIN) $(BIN).c -I$(INC) -lxml2 -std=c99 clean: rm -f *.o $(BIN)
xml文件(dprod.xml)内容:
<?xml version="1.0"?> <allproducts> <products> <product name="name11" value="value11" /> <product name="name12" value="value12" /> <product name="name13" value="value13" /> <product name="name14" value="value14" /> </products> <products> <product name="name21" value="value21" /> <product name="name22" value="value22" /> <product name="name23" value="value23" /> </products> <products> <product name="name31" value="value31" /> <product name="name32" value="value32" /> </products> </allproducts>
编译运行:
[zcm@tmp #115]$make gcc -o a a.c -I/usr/include/libxml2 -lxml2 -std=c99 a.c: 在函数‘GetCurFilePath’中: a.c:18: 警告:隐式声明函数‘readlink’ [zcm@tmp #116]$./a nodeset->nodeNr = 3 cur->name = products cur->name = product name = name11, value = value11 cur->name = product name = name12, value = value12 cur->name = product name = name13, value = value13 cur->name = product name = name14, value = value14 cur->name = products cur->name = product name = name21, value = value21 cur->name = product name = name22, value = value22 cur->name = product name = name23, value = value23 cur->name = products cur->name = product name = name31, value = value31 cur->name = product name = name32, value = value32 [zcm@tmp #117]$
说明:对于编译中出现的“a.c:18: 警告:隐式声明函数‘readlink’”错误,实在不能明白。我查了下手册,这个函数在unistd.h中,而且我也已经#include了,为什么还会出现这个错误呢?
后来突然想到,可能是-std=c99的原因,将它改为-std=gnu99后,这个警告就没有了!
--------------------------------------------------------------------------------------------------------------------------------------------------
修改了xml文件和上面源码中的testReadXmlDoc()后,发现结果相当神奇,看来对libxml2的理解还是比较缺乏。
1. 修改xml文件内容:
<?xml version="1.0"?> <allproducts> <products>h1 <product name="name11" value="value11" />h2 <product name="name12" value="value12" />h3 <product name="name13" value="value13" />h4 <product name="name14" value="value14" />h5 </products> <products> <product name="name21" value="value21" /> <product name="name22" value="value22" /> <product name="name23" value="value23" /> </products> <products> <product name="name31" value="value31" />g1 <product name="name32" value="value32" /> g2</products> </allproducts>
2. 修改testReadXmlDoc()
// 解析xmlPath路径的结点 void testReadXmlDoc(char *filepath, char *xmlPath) { xmlDocPtr doc = getdoc(filepath); if(NULL == doc) return ; xmlChar *xpath = (xmlChar*) xmlPath; xmlXPathObjectPtr result = getnodeset(doc, xpath); // 获取结果集 if(result) { xmlNodeSetPtr nodeset = result->nodesetval; xmlChar *name, *value; printf("nodeset->nodeNr = %d\n", nodeset->nodeNr); // 打印结果集中结点个数 for(int i = 0; i < nodeset->nodeNr; i++) { xmlNodePtr cur = nodeset->nodeTab[i]; // products printf("cur->name = %s\n", cur->name); cur = cur->xmlChildrenNode; int ctext = 0; while(cur) { if(xmlStrcmp(cur->name, (const xmlChar*) "text")) // cur->name不为"text" { printf("cur->name = %s\t", cur->name); name = xmlGetProp(cur, (const xmlChar*) "name"); // 获取属性值 value = xmlGetProp(cur, (const xmlChar*) "value"); printf("name = %s, value = %s\n", name, value); xmlFree(name); xmlFree(value); } else { ctext++; xmlChar *v = xmlNodeListGetString(doc, cur, 1); printf("cur->content = [%s], v = [%s]", cur->content, v); // cur->content获取cur的内容 xmlFree(v); } cur = cur->next; } printf("ctext = %d\n", ctext); printf("\n"); } xmlXPathFreeObject(result); } xmlFreeDoc(doc); xmlCleanupParser(); }
运行结果:
[zcm@tmp #168]$make gcc -o a a.c -I/usr/include/libxml2 -lxml2 -std=gnu99 [zcm@tmp #169]$./a nodeset->nodeNr = 3 cur->name = products cur->content = [h1 ], v = [h1 h2 h3 h4 h5 ]cur->name = product name = name11, value = value11 cur->content = [h2 ], v = [h2 h3 h4 h5 ]cur->name = product name = name12, value = value12 cur->content = [h3 ], v = [h3 h4 h5 ]cur->name = product name = name13, value = value13 cur->content = [h4 ], v = [h4 h5 ]cur->name = product name = name14, value = value14 cur->content = [h5 ], v = [h5 ]ctext = 5 cur->name = products cur->content = [ ], v = [ ]cur->name = product name = name21, value = value21 cur->content = [ ], v = [ ]cur->name = product name = name22, value = value22 cur->content = [ ], v = [ ]cur->name = product name = name23, value = value23 cur->content = [ ], v = [ ]ctext = 4 cur->name = products cur->content = [ ], v = [ g1 g2]cur->name = product name = name31, value = value31 cur->content = [g1 ], v = [g1 g2]cur->name = product name = name32, value = value32 cur->content = [ g2], v = [ g2]ctext = 3 [zcm@tmp #170]$
由此可见,一般情况下,我们用的比较多的可能会是cur->content这个东西了!
补充:
在网上看到一个人的写法,可以在解析xml文件时,直接忽略掉结点之间的无效空白。对于本文,就是将:
doc = xmlParseFile(docname); --->修改为:doc = xmlParseFile(docname, "UTF-8",
XML_PARSE_NOBLANKS); // 第3个参数是关键