现在的位置: 首页 > 综合 > 正文

[Linux C]利用libxml2解析xml文件

2013年12月10日 ⁄ 综合 ⁄ 共 6610字 ⁄ 字号 评论关闭

为了解析xml,可以使用Linux下默认安装的libxml2。

/*
	a.c
	功能:利用libxml2解析xml文件
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <libgen.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <libxml/xpath.h>

int GetCurFilePath(char *lpOut)		// get full path of the executable file
{
	char chPath[BUFSIZ] = {0};
	int nRetVal = readlink("/proc/self/exe", chPath, sizeof(chPath)); // get full path of the current-executable file
	if(nRetVal < 0)
	{
		strcpy(lpOut, ".");
		return -1;
	}
	else
	{
		strcpy(lpOut, chPath);
		return 0;
	}
}

int GetCurDir(char *lpOut)									// get directory-path of current executable-file
{
	char	chPath[BUFSIZ] = { 0 };
	if( GetCurFilePath(chPath) < 0 )
		return - 1;
	dirname(chPath);										// dirname will change value of "chPath"(contain result)
	strcpy(lpOut, chPath);									// copy result to out-param

	return 0;
}

xmlDocPtr getdoc(char *docname)							// 根据文件名得到文档指针
{
	xmlDocPtr doc;
	doc = xmlParseFile(docname);
	if(doc == NULL)
	{
		fprintf(stderr, "Document not parsed successfully.\n");
		return NULL;
	}
	return doc;
}

// 在文档doc中解析xpath表达式,返回结果集指针
xmlXPathObjectPtr getnodeset(xmlDocPtr doc, xmlChar *xpath)
{
	xmlXPathContextPtr context;
	xmlXPathObjectPtr result;
	context = xmlXPathNewContext(doc);
	if(context == NULL)
	{
		printf("Error in xmlXPathNewContent\n");
		return NULL;
	}
	result = xmlXPathEvalExpression(xpath, context);		// 在context中解析表达式xpath
	xmlXPathFreeContext(context);							// 释放context
	if(result == NULL)
	{
		printf("Error in xmlXPathEvalExpression\n");
		return NULL;
	}
	if(xmlXPathNodeSetIsEmpty(result->nodesetval))			// 解析表达式的结果集为空
	{
		xmlXPathFreeObject(result);
		printf("No result\n");
		return NULL;
	}
	return result;
}

// 解析xmlPath路径的结点
void testReadXmlDoc(char *filepath, char *xmlPath)
{
	xmlDocPtr doc = getdoc(filepath);
	if(NULL == doc)
		return ;

	xmlChar *xpath = (xmlChar*) xmlPath;
	xmlXPathObjectPtr result = getnodeset(doc, xpath);			// 获取结果集
	if(result)
	{
		xmlNodeSetPtr nodeset = result->nodesetval;
		xmlChar *name, *value;
		printf("nodeset->nodeNr = %d\n", nodeset->nodeNr);		// 打印结果集中结点个数
		for(int i = 0; i < nodeset->nodeNr; i++)
		{
			xmlNodePtr cur = nodeset->nodeTab[i];				// products
			printf("cur->name = %s\n", cur->name);
			cur = cur->xmlChildrenNode;
			while(cur)
			{
				if(xmlStrcmp(cur->name, (const xmlChar*) "text"))		// cur->name不为"text"
				{
					printf("cur->name = %s\t", cur->name);
					name = xmlGetProp(cur, (const xmlChar*) "name");	// 获取属性值
					value = xmlGetProp(cur, (const xmlChar*) "value");
					printf("name = %s, value = %s\n", name, value);
					xmlFree(name);
					xmlFree(value);
				}
				cur = cur->next;
			}
			printf("\n");
		}
		xmlXPathFreeObject(result);
	}
	xmlFreeDoc(doc);
	xmlCleanupParser();
}

int main(void)
{
	char curDir[100] = {0};
	char docname[100] = {0};
	GetCurDir(curDir);
	strcpy(docname, curDir);
	strcat(docname, "/dprod.xml");
	testReadXmlDoc(docname, "/allproducts/products");

	return EXIT_SUCCESS;
}

makefile文件:

CC=gcc
CFLAGS=
BIN=a
INC=/usr/include/libxml2

$(BIN): $(BIN).c
	$(CC) $(CFLAGS) -o $(BIN) $(BIN).c -I$(INC) -lxml2 -std=c99

clean:
	rm -f *.o $(BIN)

xml文件(dprod.xml)内容:

<?xml version="1.0"?>
<allproducts>
  <products>
	<product name="name11" value="value11" />
	<product name="name12" value="value12" />
	<product name="name13" value="value13" />
	<product name="name14" value="value14" />
  </products>
  <products>
	<product name="name21" value="value21" />
	<product name="name22" value="value22" />
	<product name="name23" value="value23" />
  </products>
  <products>
	<product name="name31" value="value31" />
	<product name="name32" value="value32" />
  </products>
</allproducts>

编译运行:

[zcm@tmp #115]$make
gcc  -o a a.c -I/usr/include/libxml2 -lxml2 -std=c99
a.c: 在函数‘GetCurFilePath’中:
a.c:18: 警告:隐式声明函数‘readlink’
[zcm@tmp #116]$./a
nodeset->nodeNr = 3
cur->name = products
cur->name = product	name = name11, value = value11
cur->name = product	name = name12, value = value12
cur->name = product	name = name13, value = value13
cur->name = product	name = name14, value = value14

cur->name = products
cur->name = product	name = name21, value = value21
cur->name = product	name = name22, value = value22
cur->name = product	name = name23, value = value23

cur->name = products
cur->name = product	name = name31, value = value31
cur->name = product	name = name32, value = value32

[zcm@tmp #117]$

说明:对于编译中出现的“a.c:18: 警告:隐式声明函数‘readlink’”错误,实在不能明白。我查了下手册,这个函数在unistd.h中,而且我也已经#include了,为什么还会出现这个错误呢?

后来突然想到,可能是-std=c99的原因,将它改为-std=gnu99后,这个警告就没有了!

--------------------------------------------------------------------------------------------------------------------------------------------------

修改了xml文件和上面源码中的testReadXmlDoc()后,发现结果相当神奇,看来对libxml2的理解还是比较缺乏。

1. 修改xml文件内容:

<?xml version="1.0"?>
<allproducts>
  <products>h1
	<product name="name11" value="value11" />h2
	<product name="name12" value="value12" />h3
	<product name="name13" value="value13" />h4
	<product name="name14" value="value14" />h5
  </products>
  <products>
	<product name="name21" value="value21" />
	<product name="name22" value="value22" />
	<product name="name23" value="value23" />
  </products>
  <products>
	<product name="name31" value="value31" />g1
	<product name="name32" value="value32" />
g2</products>
</allproducts>

2. 修改testReadXmlDoc()

// 解析xmlPath路径的结点
void testReadXmlDoc(char *filepath, char *xmlPath)
{
	xmlDocPtr doc = getdoc(filepath);
	if(NULL == doc)
		return ;

	xmlChar *xpath = (xmlChar*) xmlPath;
	xmlXPathObjectPtr result = getnodeset(doc, xpath);			// 获取结果集
	if(result)
	{
		xmlNodeSetPtr nodeset = result->nodesetval;
		xmlChar *name, *value;
		printf("nodeset->nodeNr = %d\n", nodeset->nodeNr);		// 打印结果集中结点个数
		for(int i = 0; i < nodeset->nodeNr; i++)
		{
			xmlNodePtr cur = nodeset->nodeTab[i];				// products
			printf("cur->name = %s\n", cur->name);
			cur = cur->xmlChildrenNode;
			int ctext = 0;
			while(cur)
			{
				if(xmlStrcmp(cur->name, (const xmlChar*) "text"))		// cur->name不为"text"
				{
					printf("cur->name = %s\t", cur->name);
					name = xmlGetProp(cur, (const xmlChar*) "name");	// 获取属性值
					value = xmlGetProp(cur, (const xmlChar*) "value");
					printf("name = %s, value = %s\n", name, value);
					xmlFree(name);
					xmlFree(value);
				}
				else
				{
					ctext++;
					xmlChar *v = xmlNodeListGetString(doc, cur, 1);
					printf("cur->content = [%s], v = [%s]", cur->content, v);	// cur->content获取cur的内容
					xmlFree(v);
				}
				cur = cur->next;
			}
			printf("ctext = %d\n", ctext);
			printf("\n");
		}
		xmlXPathFreeObject(result);
	}
	xmlFreeDoc(doc);
	xmlCleanupParser();
}

运行结果:

[zcm@tmp #168]$make
gcc  -o a a.c -I/usr/include/libxml2 -lxml2 -std=gnu99
[zcm@tmp #169]$./a
nodeset->nodeNr = 3
cur->name = products
cur->content = [h1
	], v = [h1
	h2
	h3
	h4
	h5
  ]cur->name = product	name = name11, value = value11
cur->content = [h2
	], v = [h2
	h3
	h4
	h5
  ]cur->name = product	name = name12, value = value12
cur->content = [h3
	], v = [h3
	h4
	h5
  ]cur->name = product	name = name13, value = value13
cur->content = [h4
	], v = [h4
	h5
  ]cur->name = product	name = name14, value = value14
cur->content = [h5
  ], v = [h5
  ]ctext = 5

cur->name = products
cur->content = [
	], v = [
	
	
	
  ]cur->name = product	name = name21, value = value21
cur->content = [
	], v = [
	
	
  ]cur->name = product	name = name22, value = value22
cur->content = [
	], v = [
	
  ]cur->name = product	name = name23, value = value23
cur->content = [
  ], v = [
  ]ctext = 4

cur->name = products
cur->content = [
	], v = [
	g1
	
g2]cur->name = product	name = name31, value = value31
cur->content = [g1
	], v = [g1
	
g2]cur->name = product	name = name32, value = value32
cur->content = [
g2], v = [
g2]ctext = 3

[zcm@tmp #170]$

由此可见,一般情况下,我们用的比较多的可能会是cur->content这个东西了!

补充:

在网上看到一个人的写法,可以在解析xml文件时,直接忽略掉结点之间的无效空白。对于本文,就是将:

doc = xmlParseFile(docname); --->修改为:doc = xmlParseFile(docname, "UTF-8",
XML_PARSE_NOBLANKS);
  // 第3个参数是关键

抱歉!评论已关闭.