Changeset 5705


Ignore:
Timestamp:
2010-08-25 14:01:51 (3 years ago)
Author:
karel
Message:

Lucene analyzer plug & play...

  • Allow plugging in multiple lucene analyzers. (via the pluginRegistry)
  • Allow specifying the analyzers to be used for indexing via myconfig.xml
  • Allow specifying the default query analyzer via myconfig.xml
  • Allow specifying an alternative query analyzer via query option ft_query_analyzer
Location:
trunk/daisy/repository/server/src/main/java/org/outerj/daisy
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/daisy/repository/server/src/main/java/org/outerj/daisy/ftindex/FullTextIndex.java

    r5620 r5705  
    2727     * This should be guaranteed to happen, i.o.w. use a try-finally block.</b></p> 
    2828     */ 
    29     Hits search(String query, long branchId, long languageId, Date date, boolean searchName, boolean searchContent, boolean searchFields) throws QueryException; 
     29    Hits search(String analyzer, String query, long branchId, long languageId, Date date, boolean searchName, boolean searchContent, boolean searchFields) throws QueryException; 
    3030 
    3131    /** 
  • trunk/daisy/repository/server/src/main/java/org/outerj/daisy/ftindex/FullTextIndexImpl.java

    r5620 r5705  
    2121import java.util.Arrays; 
    2222import java.util.Date; 
     23import java.util.HashMap; 
    2324import java.util.List; 
     25import java.util.Map; 
    2426import java.util.concurrent.TimeUnit; 
    2527import java.util.concurrent.locks.Lock; 
     
    6769public class FullTextIndexImpl implements FullTextIndex, FullTextIndexImplMBean, SuspendableProcess { 
    6870    private static final int PRECISION_STEP_LONG = 6; 
    69     private Analyzer luceneAnalyzer; 
    70     private File indexDirectoryFile; 
     71    private String indexAnalyzerName; 
     72    private Analyzer indexAnalyzer; 
     73    private Analyzer fallbackAnalyzer = new StandardAnalyzer(Version.LUCENE_30); 
     74    private String defaultQueryAnalyzerName; 
     75    private Map<String, Analyzer> analyzersByName = new HashMap<String, Analyzer>(); 
     76    private File indexDirectoryFile; 
    7177    private Directory indexDirectory; 
    7278    private IndexWriter indexWriter; 
     
    7783    private Thread indexFlushThread = null; 
    7884    private IndexOptimizeThread indexOptimizeThread = null; 
     85    private PluginUser<Analyzer> analyzerPluginUser = new AnalyzerPluginUser(); 
     86    private boolean initDone = false; 
    7987     
    8088    /** 
     
    115123    @PreDestroy 
    116124    public void destroy() throws Exception { 
     125        pluginRegistry.unsetPluginUser(Analyzer.class, analyzerPluginUser); 
     126 
    117127        this.stop(); 
    118128        this.dispose(); 
     
    127137            throw new ConfigurationException("The specified directory is not a directory: " + directoryName); 
    128138        log.debug("Using the following as directory to store indexes: " + indexDirectoryFile); 
     139         
     140        indexAnalyzerName = configuration.getChild("indexAnalyzer").getValue(null); 
     141        defaultQueryAnalyzerName = configuration.getChild("defaultQueryAnalyzer").getValue(null); 
    129142 
    130143        this.indexFlushInterval = configuration.getChild("indexFlushInterval").getValueAsInteger(indexFlushInterval); 
     
    146159        } 
    147160         
    148         // set the analyzer 
    149         PluginUser<Analyzer> analyzerPluginUser = new AnalyzerPluginUser(); 
     161        // register the plugin user 
    150162        pluginRegistry.setPluginUser(Analyzer.class, analyzerPluginUser); 
    151         pluginRegistry.unsetPluginUser(Analyzer.class, analyzerPluginUser); 
    152         if (luceneAnalyzer == null) { 
    153             luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_30); 
    154         } 
     163         
     164        // fetch the indexAnalyzer; 
     165        indexAnalyzer = getAnalyzer(indexAnalyzerName); 
    155166 
    156167        // Make the initial IndexWriter instance 
     
    163174 
    164175        pluginRegistry.addPlugin(SuspendableProcess.class, SUSPEND_PROCESS_NAME, this); 
     176        initDone = true; 
    165177    } 
    166178 
     
    232244    } 
    233245 
    234     public Hits search(String queryAsString, long branchId, long languageId, Date date, boolean searchName, boolean searchContent, boolean searchFields) throws QueryException { 
     246    public Hits search(String analyzerName, String queryAsString, long branchId, long languageId, Date date, boolean searchName, boolean searchContent, boolean searchFields) throws QueryException { 
     247        Analyzer analyzer; 
     248        if (analyzerName == null && defaultQueryAnalyzerName == null) { 
     249            analyzer = fallbackAnalyzer; 
     250        } else { 
     251            if (analyzerName == null) { 
     252                analyzerName = defaultQueryAnalyzerName; 
     253            } 
     254            if (!analyzersByName.containsKey(analyzerName)) { 
     255                throw new QueryException("No analyzer named " + analyzerName + " is registered"); 
     256            } 
     257            analyzer = getAnalyzer(analyzerName); 
     258        } 
    235259        BooleanQuery query = new BooleanQuery(); 
    236260        try { 
     
    244268 
    245269            String[] strings = new String[fieldstoSearch.size()]; 
    246             Arrays.fill(strings, queryAsString); 
    247             Query baseQuery = MultiFieldQueryParser.parse(Version.LUCENE_30, strings, fieldstoSearch.toArray(new String[fieldstoSearch.size()]), luceneAnalyzer); 
     270            Arrays.fill(strings, queryAsString);// analyzer = new org.apache.lucene.analysis.fr.FrenchAnalyzer(Version.LUCENE_30); 
     271            Query baseQuery = MultiFieldQueryParser.parse(Version.LUCENE_30, strings, fieldstoSearch.toArray(new String[fieldstoSearch.size()]), analyzer); 
    248272             
    249273            query.add(baseQuery, BooleanClause.Occur.MUST); 
     
    422446 
    423447    private IndexWriter constructIndexWriter() throws IOException { 
    424         return new IndexWriter(indexDirectory, luceneAnalyzer, MaxFieldLength.UNLIMITED); 
     448        return new IndexWriter(indexDirectory, indexAnalyzer, MaxFieldLength.UNLIMITED); 
    425449    } 
    426450 
     
    530554    } 
    531555 
     556    public Analyzer getAnalyzer(String name) { 
     557        Analyzer result = null;  
     558        if (name != null) { 
     559            result = analyzersByName.get(name); 
     560        } 
     561        if (result == null) { 
     562            return fallbackAnalyzer; 
     563        } 
     564        return result; 
     565    } 
     566     
    532567    private class AnalyzerPluginUser implements PluginUser<Analyzer> { 
    533         private boolean done = false; 
    534  
    535568        public void pluginAdded(PluginHandle<Analyzer> pluginHandle) { 
    536             if (!done) { // ignore all but the very first registered plugin 
    537                 done = true; 
    538                 FullTextIndexImpl.this.luceneAnalyzer = pluginHandle.getPlugin(); 
    539                 log.info("Using custom lucene analyzer registered with name " + pluginHandle.getName()); 
     569            synchronized (analyzersByName) { 
     570                if (analyzersByName.containsKey(pluginHandle.getName())) { 
     571                    log.warn(String.format("Analyzer with name %s already registered, not registering it again", pluginHandle.getName())); 
     572                } else { 
     573                    log.info("Registering Lucene Analyzer with name " + pluginHandle.getName()); 
     574                    analyzersByName.put(pluginHandle.getName(), pluginHandle.getPlugin()); 
     575                    if (pluginHandle.getName().equals(indexAnalyzerName) && !FullTextIndexImpl.this.initDone) { 
     576                        log.error("Refusing to set the indexAnalyzer after starting the repository to avoid indexing errors"); 
     577                    } 
     578                } 
    540579            } 
    541580        } 
    542581 
    543582        public void pluginRemoved(PluginHandle<Analyzer> pluginHandle) { 
    544             // ignored 
     583            analyzersByName.remove(pluginHandle.getName()); 
    545584        } 
    546585    } 
  • trunk/daisy/repository/server/src/main/java/org/outerj/daisy/query/model/Query.java

    r5624 r5705  
    6060    private boolean annotateLinkFields = true; 
    6161    private String styleHint; 
     62    private String analyzerName; 
    6263    private SqlGenerationContext sqlGenerationContext; 
    6364    private int chunkOffset = 1; 
     
    222223                throw new NumberFormatException("The following value could not be used as a chunk length : " + value + ". Chunk lengths must be numeric."); 
    223224            } 
     225        } else if (name.equalsIgnoreCase("ft_query_analyzer")) { 
     226          this.analyzerName = value;   
    224227        } else 
    225228            throw new RuntimeException("Unrecognized option: " + name); 
     
    280283        this.chunkOffset = chunkOffset; 
    281284    } 
     285     
     286    public String getAnalyzerName() { 
     287        return analyzerName; 
     288    } 
    282289 
    283290    /** 
  • trunk/daisy/repository/server/src/main/java/org/outerj/daisy/repository/serverimpl/query/LocalQueryManager.java

    r5621 r5705  
    491491                    pointInTime = query.getVersionMode().getDate(); 
    492492                } 
    493                 fullTextHits = fullTextIndex.search(ftQuery.getQuery(), ftQuery.getBranchId(), ftQuery.getLanguageId(), pointInTime,  
     493                fullTextHits = fullTextIndex.search(query.getAnalyzerName(), ftQuery.getQuery(), ftQuery.getBranchId(), ftQuery.getLanguageId(), pointInTime,  
    494494                        ftQuery.getSearchName(), ftQuery.getSearchContent(), ftQuery.getSearchFields()); 
    495495                evaluationInfo.setHits(fullTextHits); 
Note: See TracChangeset for help on using the changeset viewer.