使用XPath解析html
NSString *urlString =nil;
urlString =@"http://www.xiyou.edu.cn/new/lm.jsp?urltype=tree.TreeTempUrl&wbtreeid=724";
NSData *htmlData = [[NSData alloc] initWithContentsOfURL:[NSURLURLWithString:urlString]];
NSData *toHtmlData =[self
toUTF8:htmlData];
TFHpple *xpathParser =[[TFHpple
alloc] initWithHTMLData:toHtmlData];
NSArray*aArray = [xpathParser
searchWithXPathQuery:@"//a"];
if ([span count] >
0) {
87; i < 102; i++) {
children];
objectAtIndex:0];
children];
objectAtIndex:0]
content];
NSLog(@"aStr:%@",aStr);
NSLog(@"aAttributeDict:%@",aAttributeDict);
stringWithFormat:@"http://www.xiyou.edu.cn%@",[aAttributeDictobjectForKey:@"href"]];
addObject:aStr];
addObject:hrefStr];
[htmlData release];
[xpathParser release];
}
//如果解析的网页不是utf8编码,如gbk编码,可以先将其转换为utf8编码再对其进行解析
-(NSData *)toUTF8:(NSData *)sourceData {
gbkStr =CFStringCreateWithBytes(NULL,[sourceData bytes],[sourceData length],kCFStringEncodingGB_18030_2000,false);
NULL) {
*utf8_String =[gbkString
dataUsingEncoding:NSUTF8StringEncoding];
}