c#获取html文本的第一张图片与截取内容摘要示例（c#获取html文本的第一张图片与截取内容摘要示例）

获取第一张图片

要我们获得到的数据是一段HTML文本、也许这段文本里面有许多图片、需要截取一张作为标题图片、也就是做为主图、这时就可以用到下面这个方法获取到第一张图片。

示例代码

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 #region 获取第一张图片 /// <summary> /// 获取HTML文本的图片地址 /// </summary> /// <param name="content"></param> /// <returns></returns>/ /// public ArrayList getimgurl(string html) { ArrayList resultStr = new ArrayList(); Regex r = new Regex(@"<IMG[^>] src=s*(?:´(?<src>[^´] )´|""(?<src>[^""] )""|(?<src>[^>s] ))s*[^>]*>", RegexOptions.IgnoreCase);//忽视大小写 MatchCollection mc = r.Matches(html); foreach (Match m in mc) { resultStr.Add(m.Groups["src"].Value.ToLower()); } if (resultStr.Count > 0) { return resultStr; } else { resultStr.Clear(); return resultStr; } } #endregion

注意：上面所返回的是一个ArrayList 集合、包含了文本里面所有的Img的src、这样我们就可以访问到img的src了

截取HTML文本

有时候我们得到的数据是一段HTML文本、需要截取HTML文本的一部分作为内容摘要、此时、我们可以运用下面这个方法

示例代码

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 #region 新闻内容摘要 /// <summary> /// 新闻内容摘要 /// </summary> /// <param name="sString"></param> /// <param name="nLeng"></param> /// <returns></returns> public string GetContentSummary(string content, int length, bool StripHTML) { if (string.IsNullOrEmpty(content) || length == 0) return ""; if (StripHTML) { Regex re = new Regex("<[^>]*>"); content = re.Replace(content, ""); content = content.Replace("　", "").Replace(" ", ""); if (content.Length <= length) return content; else return content.Substring(0, length) "……"; } else { if (content.Length <= length) return content; int pos = 0, npos = 0, size = 0; bool firststop = false, notr = false, noli = false; StringBuilder sb = new StringBuilder(); while (true) { if (pos >= content.Length) break; string cur = content.Substring(pos, 1); if (cur == "<") { string next = content.Substring(pos 1, 3).ToLower(); if (next.IndexOf("p") == 0 && next.IndexOf("pre") != 0) { npos = content.IndexOf(">", pos) 1; } else if (next.IndexOf("/p") == 0 && next.IndexOf("/pr") != 0) { npos = content.IndexOf(">", pos) 1; if (size < length) sb.Append("<br/>"); } else if (next.IndexOf("br") == 0) { npos = content.IndexOf(">", pos) 1; if (size < length) sb.Append("<br/>"); } else if (next.IndexOf("img") == 0) { npos = content.IndexOf(">", pos) 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); size = npos - pos 1; } } else if (next.IndexOf("li") == 0 || next.IndexOf("/li") == 0) { npos = content.IndexOf(">", pos) 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); } else { if (!noli && next.IndexOf("/li") == 0) { sb.Append(content.Substring(pos, npos - pos)); noli = true; } } } else if (next.IndexOf("tr") == 0 || next.IndexOf("/tr") == 0) { npos = content.IndexOf(">", pos) 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); } else { if (!notr && next.IndexOf("/tr") == 0) { sb.Append(content.Substring(pos, npos - pos)); notr = true; } } } else if (next.IndexOf("td") == 0 || next.IndexOf("/td") == 0) { npos = content.IndexOf(">", pos) 1; if (size < length) { sb.Append(content.Substring(pos, npos - pos)); } else { if (!notr) { sb.Append(content.Substring(pos, npos - pos)); } } } else { npos = content.IndexOf(">", pos) 1; sb.Append(content.Substring(pos, npos - pos)); } if (npos <= pos) npos = pos 1; pos = npos; } else { if (size < length) { sb.Append(cur); size ; } else { if (!firststop) { sb.Append("……"); firststop = true; } } pos ; } } return sb.ToString(); } } #endregion