使用libxml2处理xml文件时,默认加载是使用utf-8编码,所以在修改和保存为GB2312编码时,需要将数据转换为utf-8编码,然后再进行修改和保存!
转换使用iconv,以下是转换代码
char * ConvertEnc( char *encFrom, char *encTo, const char * in)
{
static char bufin[1024], bufout[1024], *sin, *sout;
int mode, lenin, lenout, ret, nline;
iconv_t c_pt;
if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
{
printf('iconv_open false: %s ==> %s\n', encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);
lenin = strlen(in) + 1;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, &sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
if (ret == -1)
{
return NULL;
}
iconv_close(c_pt);
return bufout;
}
以下是实例
test.xml
<?xml version='1.0' encoding='gb2312'?>
<parent>测试</parent>
读取代码
int main(void)
{
xmlDocPtr doc = NULL;
xmlNodePtr cur = NULL;
doc = xmlParseFile('test.xml');
cur = xmlDocGetRootElement(doc);
printf('%s\n', Convert('utf-8', 'gb2312', (char *)xmlNodeGetContent(cur)));
}
修改、保存代码
test2.xml
<?xml version='1.0' encoding='gb2312'?>
<story>
<storyinfo>
<author>John Fleck</author>
<datewritten>June 2, 2002</datewritten>
<keyword>我来也 example keyword</keyword>
<书目> C++ </书目>
<测试> test </测试>
</storyinfo>
<body>
<headline>This is the headline</headline>
<para>This is the body text.</para>
</body>
</story>
xmlDocPtr
parseDoc(char *docname, char *uri) {
xmlDocPtr doc;
xmlNodePtr cur;
xmlNodePtr newnode;
xmlAttrPtr newattr;
doc = xmlParseFile(docname);
if (doc == NULL ) {
fprintf(stderr,'Document not parsed successfully. \n');
return (NULL);
}
cur = xmlDocGetRootElement(doc);
if (cur == NULL) {
fprintf(stderr,'empty document\n');
xmlFreeDoc(doc);
return (NULL);
}
if (xmlStrcmp(cur->name, (const xmlChar *) 'story')) {
fprintf(stderr,'document of the wrong type, root node != story');
xmlFreeDoc(doc);
return (NULL);
}
newnode = xmlNewTextChild (cur, NULL, (xmlChar *)'reference', NULL);
newattr = xmlNewProp (newnode, (xmlChar *)'uri', (xmlChar *)uri);
return(doc);
}
int main(int argc, char ** argv )
{
int options = 0;
xmlDocPtr doc = NULL;
char * output = NULL;
char * szNode = NULL;
int ret = 0;
xmlAddEncodingAlias('UTF-8', 'DVEnc');
xmlKeepBlanksDefault(0);
defaultEntityLoader = xmlGetExternalEntityLoader();
xmlSetExternalEntityLoader(xmllintExternalEntityLoader);
xmlLineNumbersDefault(1);
szNode = Convert('gb2312', 'utf-8', '测试' );
doc = parseDoc( argv[2], szNode);
ret = xmlSaveFormatFileEnc(output ? output : '-', doc, argv[1], 1);
if (ret < 0)
{
fprintf(stderr, 'failed save to %s\n',
output ? output : '-');
}
return 0;
}
执行
xmlout gb2312 test2.xml
结果
<?xml version='1.0' encoding='gb2312'?>
<story>
<storyinfo>
<author>John Fleck</author>
<datewritten>June 2, 2002</datewritten>
<keyword>我来也 example keyword</keyword>
<书目> C++ </书目>
<测试> test </测试>
</storyinfo>
<body>
<headline>This is the headline</headline>
<para>This is the body text.</para>
</body>
<reference uri='测试'/>
</story>