Coverage Report - net.sourceforge.pebble.event.response.ContentSpamListener
 
Classes in this File Line Coverage Branch Coverage Complexity
ContentSpamListener
92%
39/42
86%
19/22
8
 
 1  
 /*
 2  
  * Copyright (c) 2003-2011, Simon Brown
 3  
  * All rights reserved.
 4  
  *
 5  
  * Redistribution and use in source and binary forms, with or without
 6  
  * modification, are permitted provided that the following conditions are met:
 7  
  *
 8  
  *   - Redistributions of source code must retain the above copyright
 9  
  *     notice, this list of conditions and the following disclaimer.
 10  
  *
 11  
  *   - Redistributions in binary form must reproduce the above copyright
 12  
  *     notice, this list of conditions and the following disclaimer in
 13  
  *     the documentation and/or other materials provided with the
 14  
  *     distribution.
 15  
  *
 16  
  *   - Neither the name of Pebble nor the names of its contributors may
 17  
  *     be used to endorse or promote products derived from this software
 18  
  *     without specific prior written permission.
 19  
  *
 20  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21  
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22  
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 23  
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 24  
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 25  
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 26  
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 27  
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 28  
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 29  
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 30  
  * POSSIBILITY OF SUCH DAMAGE.
 31  
  */
 32  
 package net.sourceforge.pebble.event.response;
 33  
 
 34  
 import net.sourceforge.pebble.PluginProperties;
 35  
 import net.sourceforge.pebble.domain.Response;
 36  
 import org.apache.commons.logging.Log;
 37  
 import org.apache.commons.logging.LogFactory;
 38  
 
 39  
 import java.util.regex.Matcher;
 40  
 import java.util.regex.Pattern;
 41  
 
 42  
 /**
 43  
  * Checks comment and TrackBack content for specified regexs and sets
 44  
  * the state of such responses to pending. This includes all user-definable
 45  
  * fields of the response : title, content, source name and source link. The
 46  
  * spam score is incremented for each field that exceeds the threshold. 
 47  
  *
 48  
  * @author Simon Brown
 49  
  */
 50  5508
 public class ContentSpamListener extends BlogEntryResponseListenerSupport {
 51  
 
 52  
   /** the log used by this class */
 53  4
   private static final Log log = LogFactory.getLog(ContentSpamListener.class);
 54  
 
 55  
   /** the default regex list */
 56  
   public static final String DEFAULT_REGEX_LIST = "cialis, viagra, poker, casino, xanax, holdem, hold-em, hold em, sex, craps, fuck, shit, teenage, phentermine, blackjack, roulette, gambling, pharmacy, carisoprodol, pills, penis, penis enlargement, anal, hentai, anime, vicodin, massage, nude, ejaculation, porn, gay, naked, girl, teens, babe, masturbating, squirt, incest, fetish, discount, cheap, interesdting, levitra, government, grants, loan, &\\#.*;, kasino, slots, play, bingo, mortgage, baccarat";
 57  
   
 58  
   /** the default threshold for the number of content matches */
 59  
   public static final int DEFAULT_THRESHOLD = 0;
 60  
 
 61  
   /** the name of the regex list property */
 62  
   public static final String REGEX_LIST_KEY = "ContentSpamListener.regexList";
 63  
 
 64  
   /** the name of the threshold property */
 65  
   public static final String THRESHOLD_KEY = "ContentSpamListener.threshold";
 66  
 
 67  
   /**
 68  
    * Called when a comment or TrackBack has been added.
 69  
    *
 70  
    * @param response a Response
 71  
    */
 72  
   protected void blogEntryResponseAdded(Response response) {
 73  316
     PluginProperties props = response.getBlogEntry().getBlog().getPluginProperties();
 74  316
     String regexList = props.getProperty(REGEX_LIST_KEY);
 75  316
     String regexes[] = null;
 76  316
     if (regexList != null) {
 77  316
       regexes = regexList.split(",");
 78  
     } else {
 79  0
       regexes = new String[0];
 80  
     }
 81  
 
 82  316
     int threshold = DEFAULT_THRESHOLD;
 83  316
     if (props.hasProperty(THRESHOLD_KEY)) {
 84  
       try {
 85  316
         threshold = Integer.parseInt(props.getProperty(THRESHOLD_KEY));
 86  0
       } catch (NumberFormatException nfe) {
 87  0
         log.error(nfe.getMessage());
 88  
         // do nothing, the value has already been defaulted
 89  316
       }
 90  
     }
 91  
 
 92  
     // check each of the fields in turn
 93  316
     if (!contentWithinThreshold(response.getTitle(), regexes, threshold)) {
 94  16
       log.info(response.getTitle() + " marked as pending : threshold for title exceeded");
 95  16
       response.setPending();
 96  16
       response.incrementSpamScore();
 97  
     }
 98  316
     if (!contentWithinThreshold(response.getSourceName(), regexes, threshold)) {
 99  16
       log.info(response.getTitle() + " marked as pending : threshold for source name exceeded");
 100  16
       response.setPending();
 101  16
       response.incrementSpamScore();
 102  
     }
 103  316
     if (!contentWithinThreshold(response.getSourceLink(), regexes, threshold)) {
 104  16
       log.info(response.getTitle() + " marked as pending : threshold for source link exceeded");
 105  16
       response.setPending();
 106  16
       response.incrementSpamScore();
 107  
     }
 108  316
     if (!contentWithinThreshold(response.getContent(), regexes, threshold)) {
 109  32
       log.info(response.getTitle() + " marked as pending : threshold for content exceeded");
 110  32
       response.setPending();
 111  32
       response.incrementSpamScore();
 112  
     }
 113  316
   }
 114  
 
 115  
   private boolean contentWithinThreshold(String content, String regexes[], int threshold) {
 116  1264
     if (content == null || content.trim().length() == 0) {
 117  8
       return true;
 118  
     }
 119  
 
 120  1256
     int count = 0;
 121  41480
     for (int i = 0; i < regexes.length; i++) {
 122  40304
       Pattern p = Pattern.compile(regexes[i].trim(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
 123  40304
       Matcher m = p.matcher(content);
 124  40320
       while (m.find()) {
 125  96
         count++;
 126  
 
 127  96
         if (count > threshold) {
 128  80
           return false;
 129  
         }
 130  
       }
 131  
     }
 132  
 
 133  1176
     return true;
 134  
   }
 135  
 
 136  
 }