C++ html解析库htmlcxx自封装的工具类（搜索标签）

#include <iostream>
#include <string>
#include "htmlcxx/include/ParserDom.h"
#include "HtmlCxxUtils.h"

using namespace std;
using namespace htmlcxx;

int main() {
	//需要解析的html文本
	string htmlStr = "<div class="parent"><div class="children_1"><span>I am the first span!</span></div><div class="children_2"><span>I am the second span!</span></div></div><div class="parent">123456789<div>";
	//解析html前设置，方式解析中文报错
	setlocale(LC_ALL, ".OCP");
	HTML::ParserDom parser;
	tree<HTML::Node> dom = parser.parseTree(htmlStr);
	//输出树中所有的文本节点
	tree<HTML::Node>::iterator it = dom.begin();
	tree<HTML::Node>::iterator end = dom.end();

	//测试static tree<HTML::Node>::iterator selectTag(tree<HTML::Node>::iterator& beginIt, tree<HTML::Node>::iterator& endIt,string tagName, int index = 1);

	//查找html中的第一个span（不写index参数，默认是第一个）
	tree<HTML::Node>::iterator firstSpanIt = HtmlCxxUtils::selectTag(it, end, "span");
	if (firstSpanIt != NULL) {
		//注意，上面的额搜索只搜索到了span标签，需要后一个位置进入span标签的内部
		firstSpanIt++;
		cout << "第一个span标签内部的text：" + firstSpanIt->text() << endl;
	}

	//查找html中的第2个span（不写index参数，默认是第一个）
	tree<HTML::Node>::iterator secondSpanIt = HtmlCxxUtils::selectTag(it, end, "span", 2);
	if (firstSpanIt != NULL) {
		//注意，上面的额搜索只搜索到了span标签，需要后一个位置进入span标签的内部
		secondSpanIt++;
		cout << "第二个span标签内部的text：" + secondSpanIt->text() << endl;
	}

	//测试static tree<HTML::Node>::iterator selectTag(tree<HTML::Node>::iterator& beginIt, tree<HTML::Node>::iterator& endIt, string tagName, string attrName, string attrValue, int index = 1);

	//查找html中的第一个div（不写index参数，默认是第一个），并且含有class属性，属性值为“children_1”
	tree<HTML::Node>::iterator firstDivIt = HtmlCxxUtils::selectTag(it, end, "div", "class", "children_1");
	if (firstDivIt != NULL) {
		cout << "第一个含有属性class="children_1"的div：" + firstDivIt->text() << endl;
	}

	//查找html中的第二个div（不写index参数，默认是第一个），并且含有class属性，属性值为“parent”
	tree<HTML::Node>::iterator secondDivIt = HtmlCxxUtils::selectTag(it, end, "div", "class", "parent", 2);
	if (firstDivIt != NULL) {
		secondDivIt++;
		cout << "第二个含有属性class="parent"的div：" + secondDivIt->text() << endl;
	}
}

htmlcxx