飞云小侠的个人博客

欢迎你到这里来

« Webwork in action 中文版面世了 | Main | Freemarker + Tiles/SiteMesh »

截取部分Html的Java类
2006/11/19,19:36

在使用普通字符串时,使用substring就可以进行截取部分字符串 (当然还要考虑多国语言的问题)

但是对于Html字符串来说,如果采用同样的方法,则会破坏html标签,造成页面错乱,经过对HtmlParser的研究,写出了一个类,可以对Html字符串进行截取.

可以自己根据实际情况改进,如果考虑多国语言,也要修改字符串长度的计算方法. 总之要灵活使用,随机应变,而不是照搬照抄.

下面贴出此类的内容,使用了开源项目Html Parser. 

 


  1. import org.apache.commons.logging.Log;
  2. import org.apache.commons.logging.LogFactory;
  3. import org.htmlparser.*;
  4. import org.htmlparser.tags.CompositeTag;
  5. import org.htmlparser.util.NodeIterator;
  6. import org.htmlparser.util.NodeList;
  7.  
  8. /**
  9. * Functions for HTML.
  10. *
  11. * @author Scud http://www.javascud.org Date: Nov 3, 2006 10:22:20 AM
  12. */
  13. public class HtmlSubstring
  14. {
  15. private static Log log = LogFactory.getLog(HtmlSubstring.class);
  16.  
  17. /**
  18. * get parser for substring.
  19. * @return Parser
  20. */
  21. public static Parser getMyParser()
  22. {
  23. Parser parser = new Parser();
  24.  
  25. PrototypicalNodeFactory factory = new PrototypicalNodeFactory();
  26.  
  27. //register tags which htmlParser not have
  28. factory.registerTag(new StrongTag());
  29. factory.registerTag(new BoldTag());
  30. factory.registerTag(new ItalicTag());
  31. factory.registerTag(new UnderlineTag());
  32. factory.registerTag(new CenterTag());
  33. factory.registerTag(new FontTag());
  34.  
  35. parser.setNodeFactory(factory);
  36.  
  37. return parser;
  38. }
  39.  
  40. /**
  41. * Substring for Html String.
  42. *
  43. * @param htmlString Html string
  44. * @param maxlength maxlength
  45. * @return String
  46. */
  47. public static String substring(String htmlString, int maxlength)
  48. {
  49. StringBuffer htmlOut = new StringBuffer();
  50. StringBuffer stringOut = new StringBuffer();
  51.  
  52. try
  53. {
  54. Parser parser = getMyParser();
  55.  
  56. parser.setInputHTML(htmlString);
  57.  
  58. NodeIterator nit = parser.elements();
  59.  
  60. boolean breaked = false;
  61.  
  62. while (nit.hasMoreNodes())
  63. {
  64. Node node = nit.nextNode();
  65. if (node instanceof Text)
  66. if (node instanceof Text)
  67. {
  68. breaked = dealText(node, stringOut, htmlOut, maxlength);
  69. }
  70. else if (node instanceof Tag)
  71. {
  72. Tag tag = (Tag) node;
  73. breaked = dealTag(tag, stringOut, htmlOut, maxlength);
  74. }
  75. else if (node instanceof Remark)
  76. {
  77. //nothing to do
  78. }
  79. if (breaked)
  80. {
  81. break;
  82. }
  83. }
  84. }
  85. catch (Exception e)
  86. {
  87. log.error("Error occured when parse Html String", e);
  88. }
  89.  
  90. return htmlOut.toString();
  91. }
  92.  
  93. private static boolean dealText(Node node, StringBuffer stringOut, StringBuffer htmlOut, int maxlength)
  94. {
  95. String currentText = node.getText();
  96. int previousLength = stringOut.length();
  97.  
  98. if (previousLength + currentText.length() >= maxlength)
  99. {
  100. String cutString = currentText.substring(0, maxlength - previousLength);
  101.  
  102. stringOut.append(cutString);
  103. htmlOut.append(cutString);
  104.  
  105. log.debug(cutString);
  106.  
  107. return true;
  108. }
  109. else
  110. {
  111. stringOut.append(node.getText());
  112. htmlOut.append(node.getText());
  113. log.debug(node.getText());
  114. }
  115.  
  116. return false;
  117. }
  118.  
  119. private static boolean dealTag(Tag aTag, StringBuffer stringOut, StringBuffer htmlOut, int maxlength) throws Exception
  120. {
  121. NodeList list = aTag.getChildren();
  122.  
  123. log.debug(getStartTagString(aTag));
  124.  
  125. htmlOut.append(getStartTagString(aTag));
  126.  
  127. boolean breaked = false;
  128.  
  129. if (list != null)
  130. {
  131. NodeIterator it = list.elements();
  132.  
  133. while (it.hasMoreNodes())
  134. {
  135. Node node = it.nextNode();
  136. if (node instanceof Text)
  137. {
  138. breaked = dealText(node, stringOut, htmlOut, maxlength);
  139. }
  140. else if (node instanceof Tag)
  141. {
  142. Tag tag = (Tag) node;
  143. breaked = dealTag(tag, stringOut, htmlOut, maxlength);
  144. }
  145. else if (node instanceof Remark)
  146. {
  147. //nothing to do
  148. }
  149.  
  150. if (breaked)
  151. {
  152. break;
  153. }
  154. }
  155. }
  156.  
  157. Tag endTag = aTag.getEndTag();
  158. if (endTag != null)
  159. {
  160. htmlOut.append(aTag.getEndTag().toHtml());
  161. log.debug(aTag.getEndTag().toHtml());
  162. }
  163.  
  164. return breaked;
  165. }
  166.  
  167. private static String getStartTagString(Tag aTag)
  168. {
  169. StringBuffer start = new StringBuffer("<");
  170.  
  171. for (Object o : aTag.getAttributesEx())
  172. {
  173. Attribute ab = (Attribute) o;
  174. start.append(ab.toString());
  175. }
  176.  
  177. start.append(">");
  178. return start.toString();
  179. }
  180.  
  181.  
  182. }
  183.  
  184. class StrongTag extends CompositeTag
  185. {
  186. private static final String[] mIds = new String[]{"STRONG"};
  187.  
  188. public StrongTag()
  189. {
  190. }
  191.  
  192. public String[] getIds()
  193. {
  194. return (mIds);
  195. }
  196.  
  197. public String[] getEnders()
  198. {
  199. return (mIds);
  200. }
  201.  
  202. public String[] getEndTagEnders()
  203. {
  204. return (new String[0]);
  205. }
  206. }
  207.  
  208. class BoldTag extends CompositeTag
  209. {
  210. private static final String[] mIds = new String[]{"B"};
  211.  
  212. public BoldTag()
  213. {
  214. }
  215.  
  216. public String[] getIds()
  217. {
  218. return (mIds);
  219. }
  220.  
  221. public String[] getEnders()
  222. {
  223. return (mIds);
  224. }
  225.  
  226. public String[] getEndTagEnders()
  227. {
  228. return (new String[0]);
  229. }
  230. }
  231.  
  232. class ItalicTag extends CompositeTag
  233. {
  234. private static final String[] mIds = new String[]{"I"};
  235.  
  236. public ItalicTag()
  237. {
  238. }
  239.  
  240. public String[] getIds()
  241. {
  242. return (mIds);
  243. }
  244.  
  245. public String[] getEnders()
  246. {
  247. return (mIds);
  248. }
  249.  
  250. public String[] getEndTagEnders()
  251. {
  252. return (new String[0]);
  253. }
  254. }
  255.  
  256. class UnderlineTag extends CompositeTag
  257. {
  258. private static final String[] mIds = new String[]{"U"};
  259.  
  260. public UnderlineTag()
  261. {
  262. }
  263.  
  264. public String[] getIds()
  265. {
  266. return (mIds);
  267. }
  268.  
  269. public UnderlineTag()
  270. {
  271. }
  272.  
  273. public String[] getIds()
  274. {
  275. return (mIds);
  276. }
  277.  
  278. public String[] getEnders()
  279. {
  280. return (mIds);
  281. }
  282.  
  283. public String[] getEndTagEnders()
  284. {
  285. return (new String[0]);
  286. }
  287. }
  288.  
  289. class CenterTag extends CompositeTag
  290. {
  291. private static final String[] mIds = new String[]{"CENTER"};
  292.  
  293. public CenterTag()
  294. {
  295. }
  296.  
  297. public String[] getIds()
  298. {
  299. return (mIds);
  300. }
  301.  
  302. public String[] getEnders()
  303. {
  304. return (mIds);
  305. }
  306.  
  307. public String[] getEndTagEnders()
  308. {
  309. return (new String[0]);
  310. }
  311. }
  312.  
  313. class FontTag extends CompositeTag
  314. {
  315. private static final String[] mIds = new String[]{"FONT"};
  316.  
  317. public FontTag()
  318. {
  319. }
  320.  
  321. public String[] getIds()
  322. {
  323. return (mIds);
  324. }
  325.  
  326. public String[] getEnders()
  327. {
  328. return (mIds);
  329. }
  330.  
  331. public String[] getEndTagEnders()
  332. {
  333. return (new String[0]);
  334. }
  335. }

 

最新回复

不能截取啊。。。

Comment Icon Posted by: CAnca at 2007/08/18, 03:45

路过看看~~~~~~~

Comment Icon Posted by: emu32 at 2007/05/07, 16:37

新手向你学习!

Comment Icon Posted by: sm at 2007/03/02, 12:58

新手向你学习!185939643QQ

Comment Icon Posted by: JM at 2007/01/03, 17:54

please give me a example

Comment Icon Posted by: scud at 2006/11/29, 20:02

你真行!
用Java里的正则表达去匹配、截取你要的部分,岂不简单得多?

Comment Icon Posted by: 匿名 at 2006/11/29, 19:51
 
Support by JavaScud