/// <summary> /// DOM查询器,用法跟jquery差不多 /// </summary> public class DomQuery { /// <summary> /// 获得节点 /// </summary> /// <param name="_HtmlDocument"></param> /// <param name="selector"></param> /// <returns></returns> /// <remarks>DOM选择器,用法跟jquery差不多</remarks> public IList<HtmlNode> Get(HtmlDocument _HtmlDocument, string selector) { string[] Expressions = selector.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); List<HtmlNode> hnList = new List<HtmlNode>(); if (Expressions[0].StartsWith("#")) { hnList.Add(_HtmlDocument.GetElementbyId(Expressions[0].TrimStart('#'))); hnList.RemoveAll(x => { return x == null; }); if (Expressions.Length == 1) { return hnList; } for (int i = 1; i < Expressions.Length; i++) { hnList = Get(hnList, Expressions[i]); } } else { hnList.AddRange(_HtmlDocument.DocumentNode.ChildNodes.Where(x => { return x.NodeType == HtmlNodeType.Element; })); for (int i = 0; i < Expressions.Length; i++) { hnList = Get(hnList, Expressions[i]); } } return hnList; } /// <summary> /// 查找节点,并直接返回InnerHtml /// </summary> /// <param name="_HtmlDocument"></param> /// <param name="selector"></param> /// <returns></returns> public string SingleGetInnerHtml(HtmlDocument _HtmlDocument, string selector) { HtmlNode hn = SingleGet(_HtmlDocument, selector); if (hn == null) return null; else return hn.InnerHtml; } /// <summary> /// 查找节点,并直接返回InnerText /// </summary> /// <param name="_HtmlDocument"></param> /// <param name="selector"></param> /// <returns></returns> public string SingleGetInnerText(HtmlDocument _HtmlDocument, string selector) { HtmlNode hn = SingleGet(_HtmlDocument, selector); if (hn == null) return null; else return hn.InnerText.Trim(); } /// <summary> /// 查找节点 /// </summary> /// <param name="_HtmlDocument"></param> /// <param name="selector"></param> /// <returns></returns> public HtmlNode SingleGet(HtmlDocument _HtmlDocument, string selector) { IList<HtmlNode> hnList = Get(_HtmlDocument, selector); if (hnList.Count == 0) { return null; } else { return hnList[0]; } } #region 获得属性 /// <summary> /// 获得属性 /// </summary> /// <param name="_HtmlNodes"></param> /// <param name="attr"></param> /// <returns></returns> public string[] Attr(IList<HtmlNode> _HtmlNodes, string attr) { if (_HtmlNodes == null) { return new string[0]; } if (_HtmlNodes.Count() == 0) { return new string[0]; } var v = from x in _HtmlNodes where x.Attributes[attr] != null select x; return (from x in v select x.Attributes[attr].Value).ToArray(); } #endregion #region 根据选择器语法查找 /// <summary> /// 根据选择器语法查找 /// </summary> /// <param name="_HtmlNodes"></param> /// <param name="Expression"></param> /// <returns></returns> private List<HtmlNode> Get(List<HtmlNode> _HtmlNodes, string Expression) { string _expre = null; string fun = null; int index = -1; string keyword = null; Regex reg = new Regex(@"([.|\-|\w]+)", RegexOptions.Singleline); MatchCollection mc = reg.Matches(Expression); for (int i = 0; i < mc.Count; i++) { if (i == 0) { _expre = mc[i].Value; } if (i == 1) { fun = mc[i].Value; } if (i == 2) { if (int.TryParse(mc[i].Value, out index) == false) { keyword = mc[i].Value; } } } List<HtmlNode> list = new List<HtmlNode>(); if (string.IsNullOrEmpty(fun) == true) { if (Expression.StartsWith(".")) { return Class(_HtmlNodes, Expression).ToList(); } else { return NodeType(_HtmlNodes, Expression).ToList(); } } else { foreach (var n in _HtmlNodes) { IEnumerable<HtmlNode> v; if (_expre.StartsWith(".")) { v = Class(n, _expre); } else { v = NodeType(n, _expre); } list.AddRange(FunAction(v, fun, index, keyword)); } return list; } } #region 函数处理 /// <summary> /// 函数处理 /// </summary> /// <param name="v"></param> /// <param name="fun"></param> /// <returns></returns> private IEnumerable<HtmlNode> FunAction(IEnumerable<HtmlNode> v, string fun, int index, string keyword) { switch (fun.ToLower()) { case "eq": return v.Where((nn, _index) => _index == index); case "lt": return v.Where((nn, _index) => _index < index); case "gt": return v.Where((nn, _index) => _index > index); case "first": if (v.Count() > 0) return new HtmlNode[] { v.First() }; else return v; case "last": if (v.Count() > 0) return new HtmlNode[] { v.Last() }; else return v; case "even": return v.Where((nn, _index) => _index % 2 == 0); case "odd": return v.Where((nn, _index) => (_index & 1) == 1); case "next": return v.Select(nn => nn.NextSibling); case "contains": return v.Where(x => { return x.InnerHtml.Contains(keyword); }); case "empty": return v.Where(x => { return x.HasChildNodes == false; }); case "header": string[] headers = new string[] { "h1", "h2", "h3", "h4", "h5", "h6" }; return FindChildNodes(v.ToArray()).Where(x => { return headers.Contains(x.OriginalName); }); default: throw new NotSupportedException("函数不支持。"); } } #endregion #endregion #region 根据类名找节点 private ParallelQuery<HtmlNode> Class(HtmlNode hn, string Expression) { return Class(new HtmlNode[] { hn }, Expression); } /// <summary> /// 根据类名找节点 /// </summary> /// <param name="_HtmlNodes"></param> /// <param name="Expression"></param> /// <returns></returns> private ParallelQuery<HtmlNode> Class(IList<HtmlNode> _HtmlNodes, string Expression) { var v = FindChildNodes(_HtmlNodes).AsParallel().Where(x => x.Attributes["class"] != null); var Y = v.Where(x => x.Attributes["class"].Value.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Contains(Expression.TrimStart('.'), StringComparer.CurrentCultureIgnoreCase)); return Y; } #endregion #region 根据类型找节点 /// <summary> /// 根据类型找节点 /// </summary> /// <param name="hn"></param> /// <param name="Expression"></param> /// <returns></returns> private ParallelQuery<HtmlNode> NodeType(HtmlNode hn, string Expression) { return NodeType(new HtmlNode[] { hn }, Expression); } /// <summary> /// 根据类型找节点 /// </summary> /// <param name="_HtmlNodes"></param> /// <param name="Expression"></param> /// <returns></returns> private ParallelQuery<HtmlNode> NodeType(IList<HtmlNode> _HtmlNodes, string Expression) { var v = FindChildNodes(_HtmlNodes).AsParallel().Where( x => x.OriginalName.Equals(Expression, StringComparison.CurrentCultureIgnoreCase)); return v; } #endregion #region 查找所有下级 /// <summary> /// 查找所有下级 /// </summary> /// <param name="_HtmlNodes"></param> /// <returns></returns> private List<HtmlNode> FindChildNodes(IList<HtmlNode> _HtmlNodes) { if (_HtmlNodes == null) { throw new Exception(""); } List<HtmlNode> list = new List<HtmlNode>(); foreach (var v in _HtmlNodes) { FindChildNodesAction(v, list); } return list; } private void FindChildNodesAction(HtmlNode hn, List<HtmlNode> list) { if (list == null) { throw new Exception(""); } foreach (var v in hn.ChildNodes) { if (hn.NodeType == HtmlNodeType.Element) { list.Add(v); FindChildNodesAction(v, list); } } } #endregion }