Java 类org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory 实例源码

项目:hapi-fhir    文件:LuceneSearchMappingFactory.java   
@Factory
public SearchMapping getSearchMapping() {
    SearchMapping mapping = new SearchMapping();

    mapping.analyzerDef("autocompleteEdgeAnalyzer", PatternTokenizerFactory.class)
            .tokenizerParam("pattern", "(.*)")
            .tokenizerParam("group", "1")
            .filter(LowerCaseFilterFactory.class)
            .filter(StopFilterFactory.class)
            .filter(EdgeNGramFilterFactory.class)
            .param("minGramSize", "3")
            .param("maxGramSize", "50")
        .analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class)
            .filter(StandardFilterFactory.class)
            .filter(StopFilterFactory.class)
            .filter(PhoneticFilterFactory.class)
            .param("encoder", "DoubleMetaphone")
            .filter(SnowballPorterFilterFactory.class)
            .param("language", "English")
        .analyzerDef("autocompleteNGramAnalyzer", StandardTokenizerFactory.class)
            .filter(WordDelimiterFilterFactory.class)
            .filter(LowerCaseFilterFactory.class)
            .filter(NGramFilterFactory.class)
            .param("minGramSize", "3")
            .param("maxGramSize", "20")
        .analyzerDef("standardAnalyzer", StandardTokenizerFactory.class)
            .filter(LowerCaseFilterFactory.class)
        .analyzerDef("exactAnalyzer", StandardTokenizerFactory.class)
        .analyzerDef("conceptParentPidsAnalyzer", WhitespaceTokenizerFactory.class);

    return mapping;
}
项目:taxonomy    文件:IAViewTextGenAnalyser.java   
/**
    * Creates a new tokenizer
    *
    */
   public IAViewTextGenAnalyser(SynonymFilterFactory synonymFilterFactory,
                                WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
       this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
   }
项目:taxonomy    文件:IAViewTextCasNoPuncAnalyser.java   
/**
    * Creates a new tokenizer
    *
    */
   public IAViewTextCasNoPuncAnalyser(SynonymFilterFactory synonymFilterFactory,
                                      WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
       this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
   }
项目:taxonomy    文件:IAViewTextNoCasNoPuncAnalyser.java   
/**
    * Creates a new tokenizer
    *
    */
   public IAViewTextNoCasNoPuncAnalyser(SynonymFilterFactory synonymFilterFactory,
                                        WordDelimiterFilterFactory wordDelimiterFilterFactory, AnalyzerType analyzerType) {
       this.synonymFilterFactory = synonymFilterFactory;
this.wordDelimiterFilterFactory = wordDelimiterFilterFactory;
this.analyzerType = analyzerType;
   }
项目:owsi-core-parent    文件:CoreLuceneAnalyzersDefinitionProvider.java   
protected void registerWithPrefix(String prefix, LuceneAnalyzerDefinitionRegistryBuilder builder) {
    builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD).tokenizer(KeywordTokenizerFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.KEYWORD_CLEAN).tokenizer(KeywordTokenizerFactory.class)
        .tokenFilter(ASCIIFoldingFilterFactory.class)
        .tokenFilter(LowerCaseFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT).tokenizer(WhitespaceTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(WordDelimiterFilterFactory.class)
                    .param("generateWordParts", "1")
                    .param("generateNumberParts", "1")
                    .param("catenateWords", "0")
                    .param("catenateNumbers", "0")
                    .param("catenateAll", "0")
                    .param("splitOnCaseChange", "0")
                    .param("splitOnNumerics", "0")
                    .param("preserveOriginal", "1")
            .tokenFilter(LowerCaseFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_STEMMING).tokenizer(WhitespaceTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(WordDelimiterFilterFactory.class)
                    .param("generateWordParts", "1")
                    .param("generateNumberParts", "1")
                    .param("catenateWords", "0")
                    .param("catenateNumbers", "0")
                    .param("catenateAll", "0")
                    .param("splitOnCaseChange", "0")
                    .param("splitOnNumerics", "0")
                    .param("preserveOriginal", "1")
            .tokenFilter(LowerCaseFilterFactory.class)
            .tokenFilter(CoreFrenchMinimalStemFilterFactory.class);

    builder.analyzer(prefix + HibernateSearchAnalyzer.TEXT_SORT).tokenizer(KeywordTokenizerFactory.class)
            .tokenFilter(ASCIIFoldingFilterFactory.class)
            .tokenFilter(LowerCaseFilterFactory.class)
            .tokenFilter(PatternReplaceFilterFactory.class)
                    .param("pattern", "('-&\\.,\\(\\))")
                    .param("replacement", " ")
                    .param("replace", "all")
            .tokenFilter(PatternReplaceFilterFactory.class)
                    .param("pattern", "([^0-9\\p{L} ])")
                    .param("replacement", "")
                    .param("replace", "all")
            .tokenFilter(TrimFilterFactory.class);

}
项目:search    文件:TestWordDelimiterFilterFactory.java   
@Test
public void testCustomTypes() throws Exception {
  String testText = "I borrowed $5,400.00 at 25% interest-rate";
  ResourceLoader loader = new SolrResourceLoader("solr/collection1");
  Map<String,String> args = new HashMap<>();
  args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");

  /* default behavior */
  WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory(args);
  factoryDefault.inform(loader);

  TokenStream ts = factoryDefault.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "5", "540000", "400", "00", "at", "25", "interest", "interestrate", "rate" });

  ts = factoryDefault.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo", "foobar", "bar" });


  /* custom behavior */
  args = new HashMap<>();
  // use a custom type mapping
  args.put("luceneMatchVersion", TEST_VERSION_CURRENT.toString());
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");
  args.put("types", "wdftypes.txt");
  WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory(args);
  factoryCustom.inform(loader);

  ts = factoryCustom.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "interestrate", "rate" });

  /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo\u200Dbar" });
}
项目:NYBC    文件:TestWordDelimiterFilterFactory.java   
@Test
public void testCustomTypes() throws Exception {
  String testText = "I borrowed $5,400.00 at 25% interest-rate";
  WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory();
  ResourceLoader loader = new SolrResourceLoader("solr/collection1");
  Map<String,String> args = new HashMap<String,String>();
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");

  /* default behavior */
  factoryDefault.init(args);
  factoryDefault.inform(loader);

  TokenStream ts = factoryDefault.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });

  ts = factoryDefault.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo", "bar", "foobar" });


  /* custom behavior */
  WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory();
  // use a custom type mapping
  args.put("types", "wdftypes.txt");
  factoryCustom.init(args);
  factoryCustom.inform(loader);

  ts = factoryCustom.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });

  /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo\u200Dbar" });
}
项目:search-core    文件:TestWordDelimiterFilterFactory.java   
@Test
public void testCustomTypes() throws Exception {
  String testText = "I borrowed $5,400.00 at 25% interest-rate";
  WordDelimiterFilterFactory factoryDefault = new WordDelimiterFilterFactory();
  ResourceLoader loader = new SolrResourceLoader("solr/collection1");
  Map<String,String> args = new HashMap<String,String>();
  args.put("generateWordParts", "1");
  args.put("generateNumberParts", "1");
  args.put("catenateWords", "1");
  args.put("catenateNumbers", "1");
  args.put("catenateAll", "0");
  args.put("splitOnCaseChange", "1");

  /* default behavior */
  factoryDefault.init(args);
  factoryDefault.inform(loader);

  TokenStream ts = factoryDefault.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "5", "400", "00", "540000", "at", "25", "interest", "rate", "interestrate" });

  ts = factoryDefault.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo", "bar", "foobar" });


  /* custom behavior */
  WordDelimiterFilterFactory factoryCustom = new WordDelimiterFilterFactory();
  // use a custom type mapping
  args.put("types", "wdftypes.txt");
  factoryCustom.init(args);
  factoryCustom.inform(loader);

  ts = factoryCustom.create(
      new MockTokenizer(new StringReader(testText), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "I", "borrowed", "$5,400.00", "at", "25%", "interest", "rate", "interestrate" });

  /* test custom behavior with a char > 0x7F, because we had to make a larger byte[] */
  ts = factoryCustom.create(
      new MockTokenizer(new StringReader("foo\u200Dbar"), MockTokenizer.WHITESPACE, false));
  BaseTokenStreamTestCase.assertTokenStreamContents(ts, 
      new String[] { "foo\u200Dbar" });
}