Java 类org.apache.lucene.analysis.core.StopFilterFactory 实例源码

项目:taxonomy    文件:TrainingSetConfiguration.java   
/**
    * Analyzer dedicated to indexing elements into training set and comparing
    * them with document to categorise
    * 
    * @return
    * @throws ParseException
    * @throws NumberFormatException
    */
   @ConditionalOnProperty(prefix = "lucene.categoriser.", value = "useTSetBasedCategoriser")
   public @Bean Analyzer trainingSetAnalyser(StopFilterFactory stopFilterFactory,
    SynonymFilterFactory synonymFilterFactory) throws NumberFormatException, ParseException {

StopFilterFactory stopFilterFactoryForTSet = null;
if (useStopFilter) {
    stopFilterFactoryForTSet = stopFilterFactory;
}
SynonymFilterFactory synonymFilterFactoryForTSet = null;
if (useSynonymFilter) {
    synonymFilterFactoryForTSet = synonymFilterFactory;
}
return new TaxonomyTrainingSetAnalyser(stopFilterFactoryForTSet, synonymFilterFactoryForTSet,
    Integer.valueOf(maxShingleSize));
   }
项目:search    文件:SolrStopwordsCarrot2LexicalDataFactory.java   
/**
 * Obtains stop words for a field from the associated
 * {@link StopFilterFactory}, if any.
 */
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
  // No need to synchronize here, Carrot2 ensures that instances
  // of this class are not used by multiple threads at a time.
  if (!solrStopWords.containsKey(fieldName)) {
    final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName)
        .getIndexAnalyzer();
    if (fieldAnalyzer instanceof TokenizerChain) {
      final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
          .getTokenFilterFactories();
      for (TokenFilterFactory factory : filterFactories) {
        if (factory instanceof StopFilterFactory) {
          // StopFilterFactory holds the stop words in a CharArraySet
          solrStopWords.put(fieldName,
              ((StopFilterFactory) factory).getStopWords());
        }

        if (factory instanceof CommonGramsFilterFactory) {
          solrStopWords.put(fieldName,
              ((CommonGramsFilterFactory) factory)
                  .getCommonWords());
        }
      }
    }
  }
  return solrStopWords.get(fieldName);
}
项目:hapi-fhir    文件:LuceneSearchMappingFactory.java   
@Factory
public SearchMapping getSearchMapping() {
    SearchMapping mapping = new SearchMapping();

    mapping.analyzerDef("autocompleteEdgeAnalyzer", PatternTokenizerFactory.class)
            .tokenizerParam("pattern", "(.*)")
            .tokenizerParam("group", "1")
            .filter(LowerCaseFilterFactory.class)
            .filter(StopFilterFactory.class)
            .filter(EdgeNGramFilterFactory.class)
            .param("minGramSize", "3")
            .param("maxGramSize", "50")
        .analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class)
            .filter(StandardFilterFactory.class)
            .filter(StopFilterFactory.class)
            .filter(PhoneticFilterFactory.class)
            .param("encoder", "DoubleMetaphone")
            .filter(SnowballPorterFilterFactory.class)
            .param("language", "English")
        .analyzerDef("autocompleteNGramAnalyzer", StandardTokenizerFactory.class)
            .filter(WordDelimiterFilterFactory.class)
            .filter(LowerCaseFilterFactory.class)
            .filter(NGramFilterFactory.class)
            .param("minGramSize", "3")
            .param("maxGramSize", "20")
        .analyzerDef("standardAnalyzer", StandardTokenizerFactory.class)
            .filter(LowerCaseFilterFactory.class)
        .analyzerDef("exactAnalyzer", StandardTokenizerFactory.class)
        .analyzerDef("conceptParentPidsAnalyzer", WhitespaceTokenizerFactory.class);

    return mapping;
}
项目:NYBC    文件:SolrStopwordsCarrot2LexicalDataFactory.java   
/**
 * Obtains stop words for a field from the associated
 * {@link StopFilterFactory}, if any.
 */
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
  // No need to synchronize here, Carrot2 ensures that instances
  // of this class are not used by multiple threads at a time.
  if (!solrStopWords.containsKey(fieldName)) {
    final Analyzer fieldAnalyzer = schema.getFieldType(fieldName)
        .getAnalyzer();
    if (fieldAnalyzer instanceof TokenizerChain) {
      final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
          .getTokenFilterFactories();
      for (TokenFilterFactory factory : filterFactories) {
        if (factory instanceof StopFilterFactory) {
          // StopFilterFactory holds the stop words in a CharArraySet
          solrStopWords.put(fieldName,
              ((StopFilterFactory) factory).getStopWords());
        }

        if (factory instanceof CommonGramsFilterFactory) {
          solrStopWords.put(fieldName,
              ((CommonGramsFilterFactory) factory)
                  .getCommonWords());
        }
      }
    }
  }
  return solrStopWords.get(fieldName);
}
项目:taxonomy    文件:TaxonomyTrainingSetAnalyser.java   
/**
    * Creates a new tokenizer
    * 
    */
   public TaxonomyTrainingSetAnalyser(StopFilterFactory stopFilterFactory, SynonymFilterFactory synonymFilterFactory,
    Integer maxShingleSize) {
this.stopFilterFactory = stopFilterFactory;
this.synonymFilterFactory = synonymFilterFactory;
this.maxShingleSize = maxShingleSize;
   }
项目:taxonomy    文件:IAViewTextCasPuncAnalyser.java   
/**
    * Creates a new {@link WhitespaceAnalyzer}
    * 
    */
   public IAViewTextCasPuncAnalyser(StopFilterFactory stopFilterFactory, SynonymFilterFactory synonymFilterFactory,
    AnalyzerType analyzerType) {
this.stopFilterFactory = stopFilterFactory;
this.synonymFilterFactory = synonymFilterFactory;
this.analyzerType = analyzerType;
   }
项目:read-open-source-code    文件:SolrStopwordsCarrot2LexicalDataFactory.java   
/**
 * Obtains stop words for a field from the associated
 * {@link StopFilterFactory}, if any.
 */
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
  // No need to synchronize here, Carrot2 ensures that instances
  // of this class are not used by multiple threads at a time.
  if (!solrStopWords.containsKey(fieldName)) {
    final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName)
        .getAnalyzer();
    if (fieldAnalyzer instanceof TokenizerChain) {
      final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
          .getTokenFilterFactories();
      for (TokenFilterFactory factory : filterFactories) {
        if (factory instanceof StopFilterFactory) {
          // StopFilterFactory holds the stop words in a CharArraySet
          solrStopWords.put(fieldName,
              ((StopFilterFactory) factory).getStopWords());
        }

        if (factory instanceof CommonGramsFilterFactory) {
          solrStopWords.put(fieldName,
              ((CommonGramsFilterFactory) factory)
                  .getCommonWords());
        }
      }
    }
  }
  return solrStopWords.get(fieldName);
}
项目:Xponents    文件:LuceneStopwords.java   
/**
 * Simple wrapper around Lucene resource loading to access Solr-provided stop lists.
 * @param loader classpath loader
 * @param givenLang ISO 2-char language ID used by lucene for lang-specific filters (./lang)
 * @return
 * @throws IOException
 */
public static Set<Object> getStopwords(ResourceLoader loader, String givenLang) throws IOException {
    String lang = givenLang.toLowerCase();
    HashMap<String, String> configurationArgs = new HashMap<>();
    configurationArgs.put("words", defaultPath(lang));
    configurationArgs.put("format", SNOWBALL_SETS.contains(lang) ? "snowball" : "wordset");
    configurationArgs.put("luceneMatchVersion", "6.6");
    StopFilterFactory filter = new StopFilterFactory(configurationArgs);
    filter.inform(loader);

    return filter.getStopWords();
}
项目:SolrTextTagger    文件:TaggerRequestHandler.java   
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}