Coverage Report - net.sourceforge.pebble.index.SearchIndex
 
Classes in this File Line Coverage Branch Coverage Complexity
SearchIndex
58%
106/180
52%
23/44
4
 
 1  
 /*
 2  
  * Copyright (c) 2003-2011, Simon Brown
 3  
  * All rights reserved.
 4  
  *
 5  
  * Redistribution and use in source and binary forms, with or without
 6  
  * modification, are permitted provided that the following conditions are met:
 7  
  *
 8  
  *   - Redistributions of source code must retain the above copyright
 9  
  *     notice, this list of conditions and the following disclaimer.
 10  
  *
 11  
  *   - Redistributions in binary form must reproduce the above copyright
 12  
  *     notice, this list of conditions and the following disclaimer in
 13  
  *     the documentation and/or other materials provided with the
 14  
  *     distribution.
 15  
  *
 16  
  *   - Neither the name of Pebble nor the names of its contributors may
 17  
  *     be used to endorse or promote products derived from this software
 18  
  *     without specific prior written permission.
 19  
  *
 20  
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21  
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22  
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 23  
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 24  
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 25  
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 26  
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 27  
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 28  
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 29  
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 30  
  * POSSIBILITY OF SUCH DAMAGE.
 31  
  */
 32  
 package net.sourceforge.pebble.index;
 33  
 
 34  
 import net.sourceforge.pebble.domain.*;
 35  
 import net.sourceforge.pebble.search.SearchException;
 36  
 import net.sourceforge.pebble.search.SearchHit;
 37  
 import net.sourceforge.pebble.search.SearchResults;
 38  
 import org.apache.commons.logging.Log;
 39  
 import org.apache.commons.logging.LogFactory;
 40  
 import org.apache.lucene.analysis.Analyzer;
 41  
 import org.apache.lucene.document.DateField;
 42  
 import org.apache.lucene.document.Document;
 43  
 import org.apache.lucene.document.Field;
 44  
 import org.apache.lucene.index.IndexReader;
 45  
 import org.apache.lucene.index.IndexWriter;
 46  
 import org.apache.lucene.index.Term;
 47  
 import org.apache.lucene.queryParser.ParseException;
 48  
 import org.apache.lucene.queryParser.QueryParser;
 49  
 import org.apache.lucene.search.Hits;
 50  
 import org.apache.lucene.search.IndexSearcher;
 51  
 import org.apache.lucene.search.Query;
 52  
 import org.apache.lucene.search.Searcher;
 53  
 
 54  
 import java.io.File;
 55  
 import java.io.IOException;
 56  
 import java.util.Iterator;
 57  
 import java.util.List;
 58  
 import java.util.Collection;
 59  
 
 60  
 /**
 61  
  * Wraps up the functionality to index blog entries. This is really just
 62  
  * a convenient wrapper around Lucene.
 63  
  *
 64  
  * @author    Simon Brown
 65  
  */
 66  
 public class SearchIndex {
 67  
 
 68  
   /** the log used by this class */
 69  4
   private static final Log log = LogFactory.getLog(SearchIndex.class);
 70  
 
 71  
   private final Blog blog;
 72  
 
 73  2740
   public SearchIndex(Blog blog) {
 74  2740
     this.blog = blog;
 75  2740
   }
 76  
 
 77  
   /**
 78  
    * Clears the index.
 79  
    */
 80  
   public void clear() {
 81  56
     File searchDirectory = new File(blog.getSearchIndexDirectory());
 82  56
     if (!searchDirectory.exists()) {
 83  52
       searchDirectory.mkdirs();
 84  
     }
 85  
 
 86  56
     synchronized (blog) {
 87  
       try {
 88  56
         Analyzer analyzer = getAnalyzer();
 89  56
         IndexWriter writer = new IndexWriter(searchDirectory, analyzer, true);
 90  56
         writer.close();
 91  0
       } catch (Exception e) {
 92  0
         log.error(e.getMessage(), e);
 93  56
       }
 94  56
     }
 95  56
   }
 96  
 
 97  
   /**
 98  
    * Allows a collection of blog entries to be indexed.
 99  
    */
 100  
   public void indexBlogEntries(Collection<BlogEntry> blogEntries) {
 101  36
     synchronized (blog) {
 102  
       try {
 103  36
         Analyzer analyzer = getAnalyzer();
 104  36
         IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
 105  
 
 106  36
         for (BlogEntry blogEntry : blogEntries) {
 107  36
           index(blogEntry, writer);
 108  
         }
 109  
 
 110  36
         writer.close();
 111  0
       } catch (Exception e) {
 112  0
         log.error(e.getMessage(), e);
 113  36
       }
 114  36
     }
 115  36
   }
 116  
 
 117  
   /**
 118  
    * Allows a collection of static pages to be indexed.
 119  
    */
 120  
   public void indexStaticPages(Collection<StaticPage> staticPages) {
 121  36
     synchronized (blog) {
 122  
       try {
 123  36
         Analyzer analyzer = getAnalyzer();
 124  36
         IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
 125  
 
 126  36
         for (StaticPage staticPage : staticPages) {
 127  0
           index(staticPage, writer);
 128  
         }
 129  
 
 130  36
         writer.close();
 131  0
       } catch (Exception e) {
 132  0
         log.error(e.getMessage(), e);
 133  36
       }
 134  36
     }
 135  36
   }
 136  
 
 137  
   /**
 138  
    * Allows a single blog entry to be (re)indexed. If the entry is already
 139  
    * indexed, this method deletes the previous index before adding the new
 140  
    * one.
 141  
    *
 142  
    * @param blogEntry   the BlogEntry instance to index
 143  
    */
 144  
   public void index(BlogEntry blogEntry) {
 145  
     try {
 146  228
       synchronized (blog) {
 147  
         // first delete the blog entry from the index (if it was there)
 148  228
         unindex(blogEntry);
 149  
 
 150  228
         Analyzer analyzer = getAnalyzer();
 151  228
         IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
 152  32
         index(blogEntry, writer);
 153  32
         writer.close();
 154  32
       }
 155  196
     } catch (Exception e) {
 156  196
       log.error(e.getMessage(), e);
 157  32
     }
 158  228
   }
 159  
 
 160  
   /**
 161  
    * Allows a single static page to be (re)indexed. If the page is already
 162  
    * indexed, this method deletes the previous index before adding the new
 163  
    * one.
 164  
    *
 165  
    * @param staticPage    the StaticPage instance to index
 166  
    */
 167  
   public void index(StaticPage staticPage) {
 168  
     try {
 169  0
       synchronized (blog) {
 170  
         // first delete the static page from the index (if it was there)
 171  0
         unindex(staticPage);
 172  
 
 173  0
         Analyzer analyzer = getAnalyzer();
 174  0
         IndexWriter writer = new IndexWriter(blog.getSearchIndexDirectory(), analyzer, false);
 175  0
         index(staticPage, writer);
 176  0
         writer.close();
 177  0
       }
 178  0
     } catch (Exception e) {
 179  0
       log.error(e.getMessage(), e);
 180  0
     }
 181  0
   }
 182  
 
 183  
   /**
 184  
    * Gets the Analyzer implementation to use.
 185  
    *
 186  
    * @return  an Analyzer instance
 187  
    * @throws Exception
 188  
    */
 189  
   private Analyzer getAnalyzer() throws Exception {
 190  412
     Class c = Class.forName(blog.getLuceneAnalyzer());
 191  412
     return (Analyzer)c.newInstance();
 192  
   }
 193  
 
 194  
   /**
 195  
    * Removes the index for a single blog entry to be removed.
 196  
    *
 197  
    * @param blogEntry   the BlogEntry instance to be removed
 198  
    */
 199  
   public void unindex(BlogEntry blogEntry) {
 200  
     try {
 201  276
       synchronized (blog) {
 202  276
         log.debug("Attempting to delete index for " + blogEntry.getTitle());
 203  276
         IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
 204  36
         Term term = new Term("id", blogEntry.getId());
 205  36
         log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
 206  36
         reader.close();
 207  36
       }
 208  240
     } catch (Exception e) {
 209  240
       log.error(e.getMessage(), e);
 210  36
     }
 211  276
   }
 212  
 
 213  
   /**
 214  
    * Removes the index for a single blog entry to be removed.
 215  
    *
 216  
    * @param staticPage    the StaticPage instance to be removed
 217  
    */
 218  
   public void unindex(StaticPage staticPage) {
 219  
     try {
 220  0
       synchronized (blog) {
 221  0
         log.debug("Attempting to delete index for " + staticPage.getTitle());
 222  0
         IndexReader reader = IndexReader.open(blog.getSearchIndexDirectory());
 223  0
         Term term = new Term("id", staticPage.getId());
 224  0
         log.debug("Deleted " + reader.delete(term) + " document(s) from the index");
 225  0
         reader.close();
 226  0
       }
 227  0
     } catch (Exception e) {
 228  0
       log.error(e.getMessage(), e);
 229  0
     }
 230  0
   }
 231  
 
 232  
   /**
 233  
    * Helper method to index an individual blog entry.
 234  
    *
 235  
    * @param blogEntry   the BlogEntry instance to index
 236  
    * @param writer      the IndexWriter to index with
 237  
    */
 238  
   private void index(BlogEntry blogEntry, IndexWriter writer) {
 239  68
     if (!blogEntry.isPublished()) {
 240  0
       return;
 241  
     }
 242  
 
 243  
     try {
 244  68
       log.debug("Indexing " + blogEntry.getTitle());
 245  68
       Document document = new Document();
 246  68
       document.add(Field.Keyword("id", blogEntry.getId()));
 247  68
       if (blogEntry.getTitle() != null) {
 248  68
         document.add(Field.Text("title", blogEntry.getTitle()));
 249  
       } else {
 250  0
         document.add(Field.Text("title", ""));
 251  
       }
 252  68
       document.add(Field.Keyword("permalink", blogEntry.getPermalink()));
 253  68
       document.add(Field.UnIndexed("date", DateField.dateToString(blogEntry.getDate())));
 254  68
       if (blogEntry.getBody() != null) {
 255  68
         document.add(Field.UnStored("body", blogEntry.getBody()));
 256  
       } else {
 257  0
         document.add(Field.UnStored("body", ""));
 258  
       }
 259  68
       if (blogEntry.getTruncatedContent() != null) {
 260  68
         document.add(Field.Text("truncatedBody", blogEntry.getTruncatedContent()));
 261  
       } else {
 262  0
         document.add(Field.Text("truncatedBody", ""));
 263  
       }
 264  
 
 265  68
       if (blogEntry.getAuthor() != null) {
 266  68
         document.add(Field.Text("author", blogEntry.getAuthor()));
 267  
       }
 268  
 
 269  
       // build up one large string with all searchable content
 270  
       // i.e. entry title, entry body and all response bodies
 271  68
       StringBuffer searchableContent = new StringBuffer();
 272  68
       searchableContent.append(blogEntry.getTitle());
 273  68
       searchableContent.append(" ");
 274  68
       searchableContent.append(blogEntry.getBody());
 275  
 
 276  68
       for (Category category : blogEntry.getCategories()) {
 277  12
         document.add(Field.Text("category", category.getId()));
 278  
       }
 279  
 
 280  68
       for (Tag tag : blogEntry.getAllTags()) {
 281  0
         document.add(Field.Text("tag", tag.getName()));
 282  
       }
 283  
 
 284  68
       searchableContent.append(" ");
 285  68
       Iterator it = blogEntry.getComments().iterator();
 286  92
       while (it.hasNext()) {
 287  24
         Comment comment = (Comment)it.next();
 288  24
         if (comment.isApproved()) {
 289  12
           searchableContent.append(comment.getBody());
 290  12
           searchableContent.append(" ");
 291  
         }
 292  24
       }
 293  68
       it = blogEntry.getTrackBacks().iterator();
 294  68
       while (it.hasNext()) {
 295  0
         TrackBack trackBack = (TrackBack)it.next();
 296  0
         if (trackBack.isApproved()) {
 297  0
           searchableContent.append(trackBack.getExcerpt());
 298  0
           searchableContent.append(" ");
 299  
         }
 300  0
       }
 301  
 
 302  
       // join the title and body together to make searching on them both easier
 303  68
       document.add(Field.UnStored("blogEntry", searchableContent.toString()));
 304  
 
 305  68
       writer.addDocument(document);
 306  0
     } catch (Exception e) {
 307  0
       log.error(e.getMessage(), e);
 308  68
     }
 309  68
   }
 310  
   /**
 311  
    * Helper method to index an individual blog entry.
 312  
    *
 313  
    * @param staticPage    the Page instance instance to index
 314  
    * @param writer      the IndexWriter to index with
 315  
    */
 316  
   private void index(StaticPage staticPage, IndexWriter writer) {
 317  
     try {
 318  0
       log.debug("Indexing " + staticPage.getTitle());
 319  0
       Document document = new Document();
 320  0
       document.add(Field.Keyword("id", staticPage.getId()));
 321  0
       if (staticPage.getTitle() != null) {
 322  0
         document.add(Field.Text("title", staticPage.getTitle()));
 323  
       } else {
 324  0
         document.add(Field.Text("title", ""));
 325  
       }
 326  0
       document.add(Field.Keyword("permalink", staticPage.getPermalink()));
 327  0
       document.add(Field.UnIndexed("date", DateField.dateToString(staticPage.getDate())));
 328  0
       if (staticPage.getBody() != null) {
 329  0
         document.add(Field.UnStored("body", staticPage.getBody()));
 330  
       } else {
 331  0
         document.add(Field.UnStored("body", ""));
 332  
       }
 333  0
       if (staticPage.getTruncatedContent() != null) {
 334  0
         document.add(Field.Text("truncatedBody", staticPage.getTruncatedContent()));
 335  
       } else {
 336  0
         document.add(Field.Text("truncatedBody", ""));
 337  
       }
 338  
 
 339  0
       if (staticPage.getAuthor() != null) {
 340  0
         document.add(Field.Text("author", staticPage.getAuthor()));
 341  
       }
 342  
 
 343  
       // build up one large string with all searchable content
 344  
       // i.e. entry title, entry body and all response bodies
 345  0
       StringBuffer searchableContent = new StringBuffer();
 346  0
       searchableContent.append(staticPage.getTitle());
 347  0
       searchableContent.append(" ");
 348  0
       searchableContent.append(staticPage.getBody());
 349  
 
 350  
       // join the title and body together to make searching on them both easier
 351  0
       document.add(Field.UnStored("blogEntry", searchableContent.toString()));
 352  
 
 353  0
       writer.addDocument(document);
 354  0
     } catch (Exception e) {
 355  0
       log.error(e.getMessage(), e);
 356  0
     }
 357  0
   }
 358  
 
 359  
   public SearchResults search(String queryString) throws SearchException {
 360  
 
 361  56
     log.debug("Performing search : " + queryString);
 362  
 
 363  56
     SearchResults searchResults = new SearchResults();
 364  56
     searchResults.setQuery(queryString);
 365  
 
 366  56
     if (queryString != null && queryString.length() > 0) {
 367  56
       Searcher searcher = null;
 368  
 
 369  
       try {
 370  56
         searcher = new IndexSearcher(blog.getSearchIndexDirectory());
 371  56
         Query query = QueryParser.parse(queryString, "blogEntry", getAnalyzer());
 372  56
         Hits hits = searcher.search(query);
 373  
 
 374  84
         for (int i = 0; i < hits.length(); i++) {
 375  28
           Document doc = hits.doc(i);
 376  28
           SearchHit result = new SearchHit(
 377  
               blog,
 378  
               doc.get("id"),
 379  
               doc.get("permalink"),
 380  
               doc.get("title"),
 381  
               doc.get("truncatedBody"),
 382  
               DateField.stringToDate(doc.get("date")),
 383  
               hits.score(i));
 384  28
           searchResults.add(result);
 385  
         }
 386  0
       } catch (ParseException pe) {
 387  0
         pe.printStackTrace();
 388  0
         searchResults.setMessage("Sorry, but there was an error. Please try another search");
 389  0
       } catch (Exception e) {
 390  0
         e.printStackTrace();
 391  0
         throw new SearchException(e.getMessage());
 392  
       } finally {
 393  56
         if (searcher != null) {
 394  
           try {
 395  56
             searcher.close();
 396  0
           } catch (IOException e) {
 397  
             // can't do much now! ;-)
 398  56
           }
 399  
         }
 400  
       }
 401  
     }
 402  
 
 403  56
     return searchResults;
 404  
   }
 405  
 
 406  
 }
 407