Clover Coverage Report - Pebble 2.5-SNAPSHOT
Coverage timestamp: Sat Jun 12 2010 09:39:29 EST
../../../../img/srcFileCovDistChart10.png 0% of files have more coverage
381   580   34   34,64
36   464   0,09   11
11     3,09  
1    
This report was generated with an evaluation server license. Purchase Clover or configure your license.
 
  StringUtils       Line # 43 381 0% 34 3 99,3% 0.9929907
 
  (370)
 
1    /*
2    * Copyright (c) 2003-2006, Simon Brown
3    * All rights reserved.
4    *
5    * Redistribution and use in source and binary forms, with or without
6    * modification, are permitted provided that the following conditions are met:
7    *
8    * - Redistributions of source code must retain the above copyright
9    * notice, this list of conditions and the following disclaimer.
10    *
11    * - Redistributions in binary form must reproduce the above copyright
12    * notice, this list of conditions and the following disclaimer in
13    * the documentation and/or other materials provided with the
14    * distribution.
15    *
16    * - Neither the name of Pebble nor the names of its contributors may
17    * be used to endorse or promote products derived from this software
18    * without specific prior written permission.
19    *
20    * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21    * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22    * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23    * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24    * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25    * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26    * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27    * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28    * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29    * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30    * POSSIBILITY OF SUCH DAMAGE.
31    */
32    package net.sourceforge.pebble.util;
33   
34    import java.util.*;
35    import java.util.regex.Matcher;
36    import java.util.regex.Pattern;
37   
38    /**
39    * A collection of utility methods for manipulating strings.
40    *
41    * @author Simon Brown
42    */
 
43    public final class StringUtils {
44   
45    private static final Pattern OPENING_B_TAG_PATTERN = Pattern.compile("<b>", Pattern.CASE_INSENSITIVE);
46    private static final Pattern CLOSING_B_TAG_PATTERN = Pattern.compile("</b>", Pattern.CASE_INSENSITIVE);
47    private static final Pattern OPENING_STRONG_TAG_PATTERN = Pattern.compile("<strong>", Pattern.CASE_INSENSITIVE);
48    private static final Pattern CLOSING_STRONG_TAG_PATTERN = Pattern.compile("</strong>", Pattern.CASE_INSENSITIVE);
49    private static final Pattern OPENING_I_TAG_PATTERN = Pattern.compile("<i>", Pattern.CASE_INSENSITIVE);
50    private static final Pattern CLOSING_I_TAG_PATTERN = Pattern.compile("</i>", Pattern.CASE_INSENSITIVE);
51    private static final Pattern OPENING_EM_TAG_PATTERN = Pattern.compile("<em>", Pattern.CASE_INSENSITIVE);
52    private static final Pattern CLOSING_EM_TAG_PATTERN = Pattern.compile("</em>", Pattern.CASE_INSENSITIVE);
53    private static final Pattern OPENING_BLOCKQUOTE_TAG_PATTERN = Pattern.compile("<blockquote>", Pattern.CASE_INSENSITIVE);
54    private static final Pattern CLOSING_BLOCKQUOTE_TAG_PATTERN = Pattern.compile("</blockquote>", Pattern.CASE_INSENSITIVE);
55    private static final Pattern BR_TAG_PATTERN = Pattern.compile("<br */*>", Pattern.CASE_INSENSITIVE);
56    private static final Pattern OPENING_P_TAG_PATTERN = Pattern.compile("<p>", Pattern.CASE_INSENSITIVE);
57    private static final Pattern CLOSING_P_TAG_PATTERN = Pattern.compile("</p>", Pattern.CASE_INSENSITIVE);
58    private static final Pattern OPENING_PRE_TAG_PATTERN = Pattern.compile("<pre>", Pattern.CASE_INSENSITIVE);
59    private static final Pattern CLOSING_PRE_TAG_PATTERN = Pattern.compile("</pre>", Pattern.CASE_INSENSITIVE);
60    private static final Pattern OPENING_UL_TAG_PATTERN = Pattern.compile("<ul>", Pattern.CASE_INSENSITIVE);
61    private static final Pattern CLOSING_UL_TAG_PATTERN = Pattern.compile("</ul>", Pattern.CASE_INSENSITIVE);
62    private static final Pattern OPENING_OL_TAG_PATTERN = Pattern.compile("<ol>", Pattern.CASE_INSENSITIVE);
63    private static final Pattern CLOSING_OL_TAG_PATTERN = Pattern.compile("</ol>", Pattern.CASE_INSENSITIVE);
64    private static final Pattern OPENING_LI_TAG_PATTERN = Pattern.compile("<li>", Pattern.CASE_INSENSITIVE);
65    private static final Pattern CLOSING_LI_TAG_PATTERN = Pattern.compile("</li>", Pattern.CASE_INSENSITIVE);
66    private static final Pattern CLOSING_A_TAG_PATTERN = Pattern.compile("</a>", Pattern.CASE_INSENSITIVE);
67    private static final Pattern OPENING_A_TAG_PATTERN = Pattern.compile("<a href=.*?>", Pattern.CASE_INSENSITIVE);
68    private static final Pattern OPENING_SUP_TAG_PATTERN = Pattern.compile("<sup>", Pattern.CASE_INSENSITIVE);
69    private static final Pattern CLOSING_SUP_TAG_PATTERN = Pattern.compile("</sup>", Pattern.CASE_INSENSITIVE);
70    private static final Pattern OPENING_SUB_TAG_PATTERN = Pattern.compile("<sub>", Pattern.CASE_INSENSITIVE);
71    private static final Pattern CLOSING_SUB_TAG_PATTERN = Pattern.compile("</sub>", Pattern.CASE_INSENSITIVE);
72   
73    public static final int MAX_CONTENT_LENGTH = 255;
74    public static final int MAX_WORD_LENGTH = 20;
75    public static final int MAX_NUM_OF_POSTS = 5;
76   
77   
78    //HTML4 248 named entities
79    private final static Map<String,String> htmlEntities = new HashMap<String,String>();
80    private final static Collection<String> allowedSchemes = new ArrayList<String>();
81   
 
82  2 toggle static {
83  2 htmlEntities.put("&nbsp;", "\u00A0");
84  2 htmlEntities.put("&iexcl;", "\u00A1");
85  2 htmlEntities.put("&cent;", "\u00A2");
86  2 htmlEntities.put("&pound;", "\u00A3");
87  2 htmlEntities.put("&curren;", "\u00A4");
88  2 htmlEntities.put("&yen;", "\u00A5");
89  2 htmlEntities.put("&brvbar;", "\u00A6");
90  2 htmlEntities.put("&sect;", "\u00A7");
91  2 htmlEntities.put("&uml;", "\u00A8");
92  2 htmlEntities.put("&copy;", "\u00A9");
93  2 htmlEntities.put("&ordf;", "\u00AA");
94  2 htmlEntities.put("&laquo;", "\u00AB");
95  2 htmlEntities.put("&not;", "\u00AC");
96  2 htmlEntities.put("&shy;", "\u00AD");
97  2 htmlEntities.put("&reg;", "\u00AE");
98  2 htmlEntities.put("&macr;", "\u00AF");
99  2 htmlEntities.put("&deg;", "\u00B0");
100  2 htmlEntities.put("&plusmn;", "\u00B1");
101  2 htmlEntities.put("&sup2;", "\u00B2");
102  2 htmlEntities.put("&sup3;", "\u00B3");
103  2 htmlEntities.put("&acute;", "\u00B4");
104  2 htmlEntities.put("&micro;", "\u00B5");
105  2 htmlEntities.put("&para;", "\u00B6");
106  2 htmlEntities.put("&middot;", "\u00B7");
107  2 htmlEntities.put("&cedil;", "\u00B8");
108  2 htmlEntities.put("&sup1;", "\u00B9");
109  2 htmlEntities.put("&ordm;", "\u00BA");
110  2 htmlEntities.put("&raquo;", "\u00BB");
111  2 htmlEntities.put("&frac14;", "\u00BC");
112  2 htmlEntities.put("&frac12;", "\u00BD");
113  2 htmlEntities.put("&frac34;", "\u00BE");
114  2 htmlEntities.put("&iquest;", "\u00BF");
115  2 htmlEntities.put("&Agrave;", "\u00C0");
116  2 htmlEntities.put("&Aacute;", "\u00C1");
117  2 htmlEntities.put("&Acirc;", "\u00C2");
118  2 htmlEntities.put("&Atilde;", "\u00C3");
119  2 htmlEntities.put("&Auml;", "\u00C4");
120  2 htmlEntities.put("&Aring;", "\u00C5");
121  2 htmlEntities.put("&AElig;", "\u00C6");
122  2 htmlEntities.put("&Ccedil;", "\u00C7");
123  2 htmlEntities.put("&Egrave;", "\u00C8");
124  2 htmlEntities.put("&Eacute;", "\u00C9");
125  2 htmlEntities.put("&Ecirc;", "\u00CA");
126  2 htmlEntities.put("&Euml;", "\u00CB");
127  2 htmlEntities.put("&Igrave;", "\u00CC");
128  2 htmlEntities.put("&Iacute;", "\u00CD");
129  2 htmlEntities.put("&Icirc;", "\u00CE");
130  2 htmlEntities.put("&Iuml;", "\u00CF");
131  2 htmlEntities.put("&ETH;", "\u00D0");
132  2 htmlEntities.put("&Ntilde;", "\u00D1");
133  2 htmlEntities.put("&Ograve;", "\u00D2");
134  2 htmlEntities.put("&Oacute;", "\u00D3");
135  2 htmlEntities.put("&Ocirc;", "\u00D4");
136  2 htmlEntities.put("&Otilde;", "\u00D5");
137  2 htmlEntities.put("&Ouml;", "\u00D6");
138  2 htmlEntities.put("&times;", "\u00D7");
139  2 htmlEntities.put("&Oslash;", "\u00D8");
140  2 htmlEntities.put("&Ugrave;", "\u00D9");
141  2 htmlEntities.put("&Uacute;", "\u00DA");
142  2 htmlEntities.put("&Ucirc;", "\u00DB");
143  2 htmlEntities.put("&Uuml;", "\u00DC");
144  2 htmlEntities.put("&Yacute;", "\u00DD");
145  2 htmlEntities.put("&THORN;", "\u00DE");
146  2 htmlEntities.put("&szlig;", "\u00DF");
147  2 htmlEntities.put("&agrave;", "\u00E0");
148  2 htmlEntities.put("&aacute;", "\u00E1");
149  2 htmlEntities.put("&acirc;", "\u00E2");
150  2 htmlEntities.put("&atilde;", "\u00E3");
151  2 htmlEntities.put("&auml;", "\u00E4");
152  2 htmlEntities.put("&aring;", "\u00E5");
153  2 htmlEntities.put("&aelig;", "\u00E6");
154  2 htmlEntities.put("&ccedil;", "\u00E7");
155  2 htmlEntities.put("&egrave;", "\u00E8");
156  2 htmlEntities.put("&eacute;", "\u00E9");
157  2 htmlEntities.put("&ecirc;", "\u00EA");
158  2 htmlEntities.put("&euml;", "\u00EB");
159  2 htmlEntities.put("&igrave;", "\u00EC");
160  2 htmlEntities.put("&iacute;", "\u00ED");
161  2 htmlEntities.put("&icirc;", "\u00EE");
162  2 htmlEntities.put("&iuml;", "\u00EF");
163  2 htmlEntities.put("&eth;", "\u00F0");
164  2 htmlEntities.put("&ntilde;", "\u00F1");
165  2 htmlEntities.put("&ograve;", "\u00F2");
166  2 htmlEntities.put("&oacute;", "\u00F3");
167  2 htmlEntities.put("&ocirc;", "\u00F4");
168  2 htmlEntities.put("&otilde;", "\u00F5");
169  2 htmlEntities.put("&ouml;", "\u00F6");
170  2 htmlEntities.put("&divide;", "\u00F7");
171  2 htmlEntities.put("&oslash;", "\u00F8");
172  2 htmlEntities.put("&ugrave;", "\u00F9");
173  2 htmlEntities.put("&uacute;", "\u00FA");
174  2 htmlEntities.put("&ucirc;", "\u00FB");
175  2 htmlEntities.put("&uuml;", "\u00FC");
176  2 htmlEntities.put("&yacute;", "\u00FD");
177  2 htmlEntities.put("&thorn;", "\u00FE");
178  2 htmlEntities.put("&yuml;", "\u00FF");
179  2 htmlEntities.put("&OElig;", "\u0152");
180  2 htmlEntities.put("&oelig;", "\u0153");
181  2 htmlEntities.put("&Scaron;", "\u0160");
182  2 htmlEntities.put("&scaron;", "\u0161");
183  2 htmlEntities.put("&Yuml;", "\u0178");
184  2 htmlEntities.put("&fnof;", "\u0192");
185  2 htmlEntities.put("&circ;", "\u02C6");
186  2 htmlEntities.put("&tilde;", "\u02DC");
187  2 htmlEntities.put("&Alpha;", "\u0391");
188  2 htmlEntities.put("&Beta;", "\u0392");
189  2 htmlEntities.put("&Gamma;", "\u0393");
190  2 htmlEntities.put("&Delta;", "\u0394");
191  2 htmlEntities.put("&Epsilon;", "\u0395");
192  2 htmlEntities.put("&Zeta;", "\u0396");
193  2 htmlEntities.put("&Eta;", "\u0397");
194  2 htmlEntities.put("&Theta;", "\u0398");
195  2 htmlEntities.put("&Iota;", "\u0399");
196  2 htmlEntities.put("&Kappa;", "\u039A");
197  2 htmlEntities.put("&Lambda;", "\u039B");
198  2 htmlEntities.put("&Mu;", "\u039C");
199  2 htmlEntities.put("&Nu;", "\u039D");
200  2 htmlEntities.put("&Xi;", "\u039E");
201  2 htmlEntities.put("&Omicron;", "\u039F");
202  2 htmlEntities.put("&Pi;", "\u03A0");
203  2 htmlEntities.put("&Rho;", "\u03A1");
204  2 htmlEntities.put("&Sigma;", "\u03A3");
205  2 htmlEntities.put("&Tau;", "\u03A4");
206  2 htmlEntities.put("&Upsilon;", "\u03A5");
207  2 htmlEntities.put("&Phi;", "\u03A6");
208  2 htmlEntities.put("&Chi;", "\u03A7");
209  2 htmlEntities.put("&Psi;", "\u03A8");
210  2 htmlEntities.put("&Omega;", "\u03A9");
211  2 htmlEntities.put("&alpha;", "\u03B1");
212  2 htmlEntities.put("&beta;", "\u03B2");
213  2 htmlEntities.put("&gamma;", "\u03B3");
214  2 htmlEntities.put("&delta;", "\u03B4");
215  2 htmlEntities.put("&epsilon;", "\u03B5");
216  2 htmlEntities.put("&zeta;", "\u03B6");
217  2 htmlEntities.put("&eta;", "\u03B7");
218  2 htmlEntities.put("&theta;", "\u03B8");
219  2 htmlEntities.put("&iota;", "\u03B9");
220  2 htmlEntities.put("&kappa;", "\u03BA");
221  2 htmlEntities.put("&lambda;", "\u03BB");
222  2 htmlEntities.put("&mu;", "\u03BC");
223  2 htmlEntities.put("&nu;", "\u03BD");
224  2 htmlEntities.put("&xi;", "\u03BE");
225  2 htmlEntities.put("&omicron;", "\u03BF");
226  2 htmlEntities.put("&pi;", "\u03C0");
227  2 htmlEntities.put("&rho;", "\u03C1");
228  2 htmlEntities.put("&sigmaf;", "\u03C2");
229  2 htmlEntities.put("&sigma;", "\u03C3");
230  2 htmlEntities.put("&tau;", "\u03C4");
231  2 htmlEntities.put("&upsilon;", "\u03C5");
232  2 htmlEntities.put("&phi;", "\u03C6");
233  2 htmlEntities.put("&chi;", "\u03C7");
234  2 htmlEntities.put("&psi;", "\u03C8");
235  2 htmlEntities.put("&omega;", "\u03C9");
236  2 htmlEntities.put("&thetasym;", "\u03D1");
237  2 htmlEntities.put("&upsih;", "\u03D2");
238  2 htmlEntities.put("&piv;", "\u03D6");
239  2 htmlEntities.put("&ensp;", "\u2002");
240  2 htmlEntities.put("&emsp;", "\u2003");
241  2 htmlEntities.put("&thinsp;", "\u2009");
242  2 htmlEntities.put("&zwnj;", "\u200C");
243  2 htmlEntities.put("&zwj;", "\u200D");
244  2 htmlEntities.put("&lrm;", "\u200E");
245  2 htmlEntities.put("&rlm;", "\u200F");
246  2 htmlEntities.put("&ndash;", "\u2013");
247  2 htmlEntities.put("&mdash;", "\u2014");
248  2 htmlEntities.put("&lsquo;", "\u2018");
249  2 htmlEntities.put("&rsquo;", "\u2019");
250  2 htmlEntities.put("&sbquo;", "\u201A");
251  2 htmlEntities.put("&ldquo;", "\u201C");
252  2 htmlEntities.put("&rdquo;", "\u201D");
253  2 htmlEntities.put("&bdquo;", "\u201E");
254  2 htmlEntities.put("&dagger;", "\u2020");
255  2 htmlEntities.put("&Dagger;", "\u2021");
256  2 htmlEntities.put("&bull;", "\u2022");
257  2 htmlEntities.put("&hellip;", "\u2026");
258  2 htmlEntities.put("&permil;", "\u2030");
259  2 htmlEntities.put("&prime;", "\u2032");
260  2 htmlEntities.put("&Prime;", "\u2033");
261  2 htmlEntities.put("&lsaquo;", "\u2039");
262  2 htmlEntities.put("&rsaquo;", "\u203A");
263  2 htmlEntities.put("&oline;", "\u203E");
264  2 htmlEntities.put("&frasl;", "\u2044");
265  2 htmlEntities.put("&euro;", "\u20AC");
266  2 htmlEntities.put("&image;", "\u2111");
267  2 htmlEntities.put("&weierp;", "\u2118");
268  2 htmlEntities.put("&real;", "\u211C");
269  2 htmlEntities.put("&trade;", "\u2122");
270  2 htmlEntities.put("&alefsym;", "\u2135");
271  2 htmlEntities.put("&larr;", "\u2190");
272  2 htmlEntities.put("&uarr;", "\u2191");
273  2 htmlEntities.put("&rarr;", "\u2192");
274  2 htmlEntities.put("&darr;", "\u2193");
275  2 htmlEntities.put("&harr;", "\u2194");
276  2 htmlEntities.put("&crarr;", "\u21B5");
277  2 htmlEntities.put("&lArr;", "\u21D0");
278  2 htmlEntities.put("&uArr;", "\u21D1");
279  2 htmlEntities.put("&rArr;", "\u21D2");
280  2 htmlEntities.put("&dArr;", "\u21D3");
281  2 htmlEntities.put("&hArr;", "\u21D4");
282  2 htmlEntities.put("&forall;", "\u2200");
283  2 htmlEntities.put("&part;", "\u2202");
284  2 htmlEntities.put("&exist;", "\u2203");
285  2 htmlEntities.put("&empty;", "\u2205");
286  2 htmlEntities.put("&nabla;", "\u2207");
287  2 htmlEntities.put("&isin;", "\u2208");
288  2 htmlEntities.put("&notin;", "\u2209");
289  2 htmlEntities.put("&ni;", "\u220B");
290  2 htmlEntities.put("&prod;", "\u220F");
291  2 htmlEntities.put("&sum;", "\u2211");
292  2 htmlEntities.put("&minus;", "\u2212");
293  2 htmlEntities.put("&lowast;", "\u2217");
294  2 htmlEntities.put("&radic;", "\u221A");
295  2 htmlEntities.put("&prop;", "\u221D");
296  2 htmlEntities.put("&infin;", "\u221E");
297  2 htmlEntities.put("&ang;", "\u2220");
298  2 htmlEntities.put("&and;", "\u2227");
299  2 htmlEntities.put("&or;", "\u2228");
300  2 htmlEntities.put("&cap;", "\u2229");
301  2 htmlEntities.put("&cup;", "\u222A");
302  2 htmlEntities.put("&int;", "\u222B");
303  2 htmlEntities.put("&there4;", "\u2234");
304  2 htmlEntities.put("&sim;", "\u223C");
305  2 htmlEntities.put("&cong;", "\u2245");
306  2 htmlEntities.put("&asymp;", "\u2248");
307  2 htmlEntities.put("&ne;", "\u2260");
308  2 htmlEntities.put("&equiv;", "\u2261");
309  2 htmlEntities.put("&le;", "\u2264");
310  2 htmlEntities.put("&ge;", "\u2265");
311  2 htmlEntities.put("&sub;", "\u2282");
312  2 htmlEntities.put("&sup;", "\u2283");
313  2 htmlEntities.put("&nsub;", "\u2284");
314  2 htmlEntities.put("&sube;", "\u2286");
315  2 htmlEntities.put("&supe;", "\u2287");
316  2 htmlEntities.put("&oplus;", "\u2295");
317  2 htmlEntities.put("&otimes;", "\u2297");
318  2 htmlEntities.put("&perp;", "\u22A5");
319  2 htmlEntities.put("&sdot;", "\u22C5");
320  2 htmlEntities.put("&lceil;", "\u2308");
321  2 htmlEntities.put("&rceil;", "\u2309");
322  2 htmlEntities.put("&lfloor;", "\u230A");
323  2 htmlEntities.put("&rfloor;", "\u230B");
324  2 htmlEntities.put("&lang;", "\u2329");
325  2 htmlEntities.put("&rang;", "\u232A");
326  2 htmlEntities.put("&loz;", "\u25CA");
327  2 htmlEntities.put("&spades;", "\u2660");
328  2 htmlEntities.put("&clubs;", "\u2663");
329  2 htmlEntities.put("&hearts;", "\u2665");
330  2 htmlEntities.put("&diams;", "\u2666");
331   
332  2 allowedSchemes.add("https://");
333  2 allowedSchemes.add("http://");
334  2 allowedSchemes.add("ftp://");
335  2 allowedSchemes.add("mailto:");
336    }
337   
338   
339    /**
340    * Filters out characters that have meaning within JSP and HTML, and
341    * replaces them with "escaped" versions.
342    *
343    * @param s the String to filter
344    * @return the filtered String
345    */
 
346  2440 toggle public static String transformHTML(String s) {
347   
348  2440 if (s == null) {
349  514 return null;
350    }
351   
352  1926 StringBuffer buf = new StringBuffer(s.length());
353   
354    // loop through every character and replace if necessary
355  1926 int length = s.length();
356  14534 for (int i = 0; i < length; i++) {
357  12608 switch (s.charAt(i)) {
358  56 case '<':
359  56 buf.append("&lt;");
360  56 break;
361  56 case '>':
362  56 buf.append("&gt;");
363  56 break;
364  2 case '&':
365  2 buf.append("&amp;");
366  2 break;
367  4 case '\"':
368  4 buf.append("&quot;");
369  4 break;
370  12490 default :
371  12490 buf.append(s.charAt(i));
372    }
373    }
374   
375  1926 return buf.toString();
376    }
377   
378    /**
379    * Transforms the given String into a subset of HTML displayable on a web
380    * page. The subset includes &lt;b&gt;, &lt;i&gt;, &lt;p&gt;, &lt;br&gt;,
381    * &lt;pre&gt; and &lt;a href&gt; (and their corresponding end tags).
382    *
383    * @param s the String to transform
384    * @return the transformed String
385    */
 
386  86 toggle public static String transformToHTMLSubset(String s) {
387   
388  86 if (s == null) {
389  6 return null;
390    }
391   
392  80 s = replace(s, OPENING_B_TAG_PATTERN, "<b>");
393  80 s = replace(s, CLOSING_B_TAG_PATTERN, "</b>");
394  80 s = replace(s, OPENING_STRONG_TAG_PATTERN, "<strong>");
395  80 s = replace(s, CLOSING_STRONG_TAG_PATTERN, "</strong>");
396  80 s = replace(s, OPENING_I_TAG_PATTERN, "<i>");
397  80 s = replace(s, CLOSING_I_TAG_PATTERN, "</i>");
398  80 s = replace(s, OPENING_EM_TAG_PATTERN, "<em>");
399  80 s = replace(s, CLOSING_EM_TAG_PATTERN, "</em>");
400  80 s = replace(s, OPENING_BLOCKQUOTE_TAG_PATTERN, "<blockquote>");
401  80 s = replace(s, CLOSING_BLOCKQUOTE_TAG_PATTERN, "</blockquote>");
402  80 s = replace(s, BR_TAG_PATTERN, "<br />");
403  80 s = replace(s, OPENING_P_TAG_PATTERN, "<p>");
404  80 s = replace(s, CLOSING_P_TAG_PATTERN, "</p>");
405  80 s = replace(s, OPENING_PRE_TAG_PATTERN, "<pre>");
406  80 s = replace(s, CLOSING_PRE_TAG_PATTERN, "</pre>");
407  80 s = replace(s, OPENING_UL_TAG_PATTERN, "<ul>");
408  80 s = replace(s, CLOSING_UL_TAG_PATTERN, "</ul>");
409  80 s = replace(s, OPENING_OL_TAG_PATTERN, "<ol>");
410  80 s = replace(s, CLOSING_OL_TAG_PATTERN, "</ol>");
411  80 s = replace(s, OPENING_LI_TAG_PATTERN, "<li>");
412  80 s = replace(s, CLOSING_LI_TAG_PATTERN, "</li>");
413  80 s = replace(s, OPENING_SUP_TAG_PATTERN, "<sup>");
414  80 s = replace(s, CLOSING_SUP_TAG_PATTERN, "</sup>");
415  80 s = replace(s, OPENING_SUB_TAG_PATTERN, "<sub>");
416  80 s = replace(s, CLOSING_SUB_TAG_PATTERN, "</sub>");
417   
418    // HTTP links - remove all attributes other than href
419  80 s = replace(s, CLOSING_A_TAG_PATTERN, "</a>");
420  80 Matcher m = OPENING_A_TAG_PATTERN.matcher(s);
421    // Use a single buffer for efficiency
422  80 StringBuffer buffer = new StringBuffer();
423    // The position in the original string that we are up to
424  80 int position = 0;
425  102 while (m.find()) {
426  22 int start = m.start();
427  22 int end = m.end();
428  22 buffer.append(s.subSequence(position, start)).append("<a href=");
429  22 String link = s.substring(start, end);
430  22 int startOfHrefIndex = link.indexOf("href=&quot;");
431  22 if (startOfHrefIndex > -1) {
432  16 int startOfHrefValue = startOfHrefIndex + "href=&quot;".length();
433  16 int endOfHrefIndex = link.indexOf("&quot;", startOfHrefValue);
434  16 buffer.append("\"").append(validateUrl(link.substring(startOfHrefValue, endOfHrefIndex))).append("\"");
435    } else {
436  6 startOfHrefIndex = link.indexOf("href='");
437  6 if (startOfHrefIndex > -1) {
438  6 int startOfHrefValue = startOfHrefIndex + "href='".length();
439  6 int endOfHrefIndex = link.indexOf("'", startOfHrefIndex+"href='".length());
440  6 buffer.append("'").append(validateUrl(link.substring(startOfHrefValue, endOfHrefIndex))).append("'");
441    }
442    }
443  22 buffer.append(">");
444  22 position = end;
445    }
446    // If position is still 0 there were no matches, so don't do anything
447  80 if (position > 0) {
448  18 buffer.append(s.subSequence(position, s.length()));
449  18 s = buffer.toString();
450    }
451   
452    // escaped angle brackets and other allowed entities
453  80 s = s.replaceAll("&amp;lt;", "&lt;");
454  80 s = s.replaceAll("&amp;gt;", "&gt;");
455  80 s = s.replaceAll("&amp;([#a-zA-Z0-9]{1,}?);", "&$1;");
456   
457  80 return s;
458    }
459   
 
460  2080 toggle private static String replace(String string, Pattern pattern, String replacement) {
461  2080 Matcher m = pattern.matcher(string);
462  2080 return m.replaceAll(replacement);
463    }
464   
465    /**
466    * Filters out newline characters.
467    *
468    * @param s the String to filter
469    * @return the filtered String
470    */
 
471  8 toggle public static String filterNewlines(String s) {
472   
473  8 if (s == null) {
474  2 return null;
475    }
476   
477  6 StringBuffer buf = new StringBuffer(s.length());
478   
479    // loop through every character and replace if necessary
480  6 int length = s.length();
481  82 for (int i = 0; i < length; i++) {
482  76 switch (s.charAt(i)) {
483  4 case '\r':
484  4 break;
485  72 default :
486  72 buf.append(s.charAt(i));
487    }
488    }
489   
490  6 return buf.toString();
491    }
492   
493    /**
494    * Filters out all HTML tags.
495    *
496    * @param s the String to filter
497    * @return the filtered String
498    */
 
499  656 toggle public static String filterHTML(String s) {
500  656 if (s == null) {
501  42 return null;
502    }
503   
504  614 s = s.replaceAll("&lt;", "");
505  614 s = s.replaceAll("&gt;", "");
506  614 s = s.replaceAll("&nbsp;", "");
507  614 s = s.replaceAll("(?s)<!--.*?-->", "");
508  614 return s.replaceAll("(?s)<.*?>", "");
509    }
510   
 
511  84 toggle public static String truncate(String s) {
512  84 return truncate(s, MAX_CONTENT_LENGTH);
513    }
514   
 
515  84 toggle public static String truncate(String s, int maxLength) {
516  84 String content = StringUtils.filterHTML(s);
517   
518    // then truncate, if necessary
519  84 if (content == null) {
520  2 return "";
521    } else {
522  82 StringBuffer buf = new StringBuffer();
523   
524  82 String words[] = content.split("\\s");
525  254 for (int i = 0; i < words.length; i++) {
526  180 if (buf.length() + words[i].length() > maxLength) {
527    // truncate here
528  2 buf.append("...");
529  2 return buf.toString();
530  178 } else if (words[i].length() > MAX_WORD_LENGTH) {
531    // truncate here
532  6 buf.append(words[i].substring(0, MAX_WORD_LENGTH));
533  6 buf.append("...");
534  6 return buf.toString();
535    } else {
536  172 buf.append(words[i]);
537  172 if ((i+1) < words.length) {
538  98 buf.append(" ");
539    }
540    }
541    }
542   
543  74 return buf.toString();
544    }
545    }
546   
 
547  10 toggle public static String stripScriptTags(String html) {
548  10 if (html == null) {
549  0 return html;
550    }
551   
552  10 html = html.replaceAll("<script.*?>.*?</script.*?>", "");
553  10 html = html.replaceAll("<script.*?/>", "");
554  10 return html;
555    }
556   
557   
 
558  12 toggle public static String unescapeHTMLEntities(String source) {
559  12 Iterator<String> it = htmlEntities.keySet().iterator();
560   
561  2988 while(it.hasNext()) {
562   
563  2976 String key = it.next();
564  2976 String val = htmlEntities.get(key);
565  2976 source = source.replaceAll(key, val);
566    }
567  12 return source;
568    }
569   
 
570  22 toggle public static String validateUrl(String url) {
571    // whitelist, don't blacklist.
572  22 for (String scheme : allowedSchemes) {
573  52 if (url.startsWith(scheme)) {
574  20 return url;
575    }
576    }
577  2 return "";
578    }
579   
580    }