Java 类org.apache.lucene.analysis.util.TokenFilterFactory 实例源码

项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnoreWhitespace() throws Exception {
  String withSpace = "foo bar";
  String withoutSpace = "foobar";
  String withPunctuation = "foo-bar";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "primary",
      "alternate", "shifted",
      "variableTop", " ");
  TokenStream tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithoutSpace = factory.create(
      new KeywordTokenizer(new StringReader(withoutSpace)));
  assertCollatesToSame(tsWithSpace, tsWithoutSpace);
  // now assert that punctuation still matters: foo-bar < foo bar
  tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  assertCollation(tsWithPunctuation, tsWithSpace, -1);
}
项目:search    文件:TestFactories.java   
private void doTestTokenFilter(String tokenfilter) throws IOException {
  Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
  TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a charfilter or tokenizer here, this makes no sense
      assertTrue(mtc instanceof TokenFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
  }
}
项目:solr-jdbc    文件:SearcherAwareReloaderTest.java   
@Before
public void setUp() {
   fieldType = new TextField();

   Map<String, FieldType> fieldTypes = Maps.newHashMap();
   fieldTypes.put("test", fieldType);

   when(searcher.getSchema()).thenReturn(schema);
   when(schema.getFieldTypes()).thenReturn(fieldTypes);

   indexAnalyzer = new TokenizerChain(
         new WhitespaceTokenizerFactory(Maps.<String, String> newHashMap()),
         new TokenFilterFactory[] { indexTokenFilterFactory });
   queryAnalyzer = new TokenizerChain(
         new WhitespaceTokenizerFactory(Maps.<String, String> newHashMap()),
         new TokenFilterFactory[] { queryTokenFilterFactory });

   reloader = new SearcherAwareReloader(null);
}
项目:NYBC    文件:TestFactories.java   
private void doTestTokenFilter(String tokenfilter) throws IOException {
  TokenFilterFactory factory = TokenFilterFactory.forName(tokenfilter);
  if (initialize(factory)) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a charfilter or tokenizer here, this makes no sense
      assertTrue(mtc instanceof TokenFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnoreWhitespace() throws Exception {
  String withSpace = "foo bar";
  String withoutSpace = "foobar";
  String withPunctuation = "foo-bar";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "primary",
      "alternate", "shifted",
      "variableTop", " ");
  TokenStream tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithoutSpace = factory.create(
      new KeywordTokenizer(new StringReader(withoutSpace)));
  assertCollatesToSame(tsWithSpace, tsWithoutSpace);
  // now assert that punctuation still matters: foo-bar < foo bar
  tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  assertCollation(tsWithPunctuation, tsWithSpace, -1);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestFactories.java   
private void doTestTokenFilter(String tokenfilter) throws IOException {
  Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
  TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:

    // if it implements MultiTermAware, sanity check its impl
    if (factory instanceof MultiTermAwareComponent) {
      AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
      assertNotNull(mtc);
      // its not ok to return a charfilter or tokenizer here, this makes no sense
      assertTrue(mtc instanceof TokenFilterFactory);
    }

    // beast it just a little, it shouldnt throw exceptions:
    // (it should have thrown them in initialize)
    checkRandomData(random(), new FactoryAnalyzer(assertingTokenizer, factory, null), 100, 20, false, false);
  }
}
项目:elasticsearch_my    文件:PluginsService.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
项目:Elasticsearch    文件:PluginsService.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
项目:search    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:search    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:search    文件:AnalyzerFactory.java   
@Override
public String toString() {
  StringBuilder sb = new StringBuilder("AnalyzerFactory(");
  if (null != name) {
    sb.append("name:");
    sb.append(name);
    sb.append(", ");
  }
  if (null != positionIncrementGap) {
    sb.append("positionIncrementGap:");
    sb.append(positionIncrementGap);
    sb.append(", ");
  }
  if (null != offsetGap) {
    sb.append("offsetGap:");
    sb.append(offsetGap);
    sb.append(", ");
  }
  for (CharFilterFactory charFilterFactory: charFilterFactories) {
    sb.append(charFilterFactory);
    sb.append(", ");
  }
  sb.append(tokenizerFactory);
  for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
    sb.append(", ");
    sb.append(tokenFilterFactory);
  }
  sb.append(')');
  return sb.toString();
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testBasicUsage() throws Exception {
  String turkishUpperCase = "I WİLL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "tr",
      "strength", "primary");
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(turkishUpperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(turkishLowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testNormalization() throws Exception {
  String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "tr",
      "strength", "primary",
      "decomposition", "canonical");
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(turkishUpperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(turkishLowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testSecondaryStrength() throws Exception {
  String upperCase = "TESTING";
  String lowerCase = "testing";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "secondary",
      "decomposition", "no");
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(upperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(lowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnorePunctuation() throws Exception {
  String withPunctuation = "foo-bar";
  String withoutPunctuation = "foo bar";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "primary",
      "alternate", "shifted");
  TokenStream tsPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  TokenStream tsWithoutPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withoutPunctuation)));
  assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testNumerics() throws Exception {
  String nine = "foobar-9";
  String ten = "foobar-10";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "numeric", "true");
  TokenStream tsNine = factory.create(
      new KeywordTokenizer(new StringReader(nine)));
  TokenStream tsTen = factory.create(
      new KeywordTokenizer(new StringReader(ten)));
  assertCollation(tsNine, tsTen, -1);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnoreAccentsButNotCase() throws Exception {
  String withAccents = "résumé";
  String withoutAccents = "resume";
  String withAccentsUpperCase = "Résumé";
  String withoutAccentsUpperCase = "Resume";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "primary",
      "caseLevel", "true");
  TokenStream tsWithAccents = factory.create(
      new KeywordTokenizer(new StringReader(withAccents)));
  TokenStream tsWithoutAccents = factory.create(
      new KeywordTokenizer(new StringReader(withoutAccents)));
  assertCollatesToSame(tsWithAccents, tsWithoutAccents);

  TokenStream tsWithAccentsUpperCase = factory.create(
      new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
  TokenStream tsWithoutAccentsUpperCase = factory.create(
      new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
  assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);

  // now assert that case still matters: resume < Resume
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(withoutAccents)));
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
  assertCollation(tsLower, tsUpper, -1);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testUpperCaseFirst() throws Exception {
  String lower = "resume";
  String upper = "Resume";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "tertiary",
      "caseFirst", "upper");
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(lower)));
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(upper)));
  assertCollation(tsUpper, tsLower, -1);
}
项目:search    文件:TestSynonymFilterFactory.java   
/** checks for synonyms of "GB" in synonyms.txt */
private void checkSolrSynonyms(TokenFilterFactory factory) throws Exception {
  Reader reader = new StringReader("GB");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = factory.create(stream);
  assertTrue(stream instanceof SynonymFilter);
  assertTokenStreamContents(stream,
      new String[] { "GB", "gib", "gigabyte", "gigabytes" },
      new int[] { 1, 0, 0, 0 });
}
项目:search    文件:TestSynonymFilterFactory.java   
/** checks for synonyms of "second" in synonyms-wordnet.txt */
private void checkWordnetSynonyms(TokenFilterFactory factory) throws Exception {
  Reader reader = new StringReader("second");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = factory.create(stream);
  assertTrue(stream instanceof SynonymFilter);
  assertTokenStreamContents(stream,
      new String[] { "second", "2nd", "two" },
      new int[] { 1, 0, 0 });
}
项目:search    文件:TestSynonymFilterFactory.java   
private static void assertDelegator(final TokenFilterFactory factory,
                                    final Class delegatorClass) {
  assertNotNull(factory);
  assertTrue("factory not expected class: " + factory.getClass(),
             factory instanceof SynonymFilterFactory);
  SynonymFilterFactory synFac = (SynonymFilterFactory) factory;
  Object delegator = synFac.getDelegator();
  assertNotNull(delegator);
  assertTrue("delegator not expected class: " + delegator.getClass(),
             delegatorClass.isInstance(delegator));

}
项目:search    文件:TestFactories.java   
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }

  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }

  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
项目:search    文件:TestTypeTokenFilterFactory.java   
public void testCreationWithBlackList() throws Exception {
  TokenFilterFactory factory = tokenFilterFactory("Type",
      "types", "stoptypes-1.txt, stoptypes-2.txt",
      "enablePositionIncrements", "true");
  NumericTokenStream input = new NumericTokenStream();
  input.setIntValue(123);
  factory.create(input);
}
项目:search    文件:TestTypeTokenFilterFactory.java   
public void testCreationWithWhiteList() throws Exception {
  TokenFilterFactory factory = tokenFilterFactory("Type",
      "types", "stoptypes-1.txt, stoptypes-2.txt",
      "enablePositionIncrements", "true",
      "useWhitelist", "true");
  NumericTokenStream input = new NumericTokenStream();
  input.setIntValue(123);
  factory.create(input);
}
项目:search    文件:TestCollationKeyFilterFactory.java   
public void testBasicUsage() throws Exception {
  String turkishUpperCase = "I WİLL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  TokenFilterFactory factory = tokenFilterFactory("CollationKey",
      "language", "tr",
      "strength", "primary");
  TokenStream tsUpper = factory.create(
      new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
  TokenStream tsLower = factory.create(
      new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestCollationKeyFilterFactory.java   
public void testNormalization() throws Exception {
  String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  TokenFilterFactory factory = tokenFilterFactory("CollationKey",
      "language", "tr",
      "strength", "primary",
      "decomposition", "canonical");
  TokenStream tsUpper = factory.create(
      new MockTokenizer(new StringReader(turkishUpperCase), MockTokenizer.KEYWORD, false));
  TokenStream tsLower = factory.create(
      new MockTokenizer(new StringReader(turkishLowerCase), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestCollationKeyFilterFactory.java   
public void testFullDecomposition() throws Exception {
  String fullWidth = "Testing";
  String halfWidth = "Testing";
  TokenFilterFactory factory = tokenFilterFactory("CollationKey",
      "language", "zh",
      "strength", "identical",
      "decomposition", "full");
  TokenStream tsFull = factory.create(
      new MockTokenizer(new StringReader(fullWidth), MockTokenizer.KEYWORD, false));
  TokenStream tsHalf = factory.create(
      new MockTokenizer(new StringReader(halfWidth), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsFull, tsHalf);
}
项目:search    文件:TestCollationKeyFilterFactory.java   
public void testSecondaryStrength() throws Exception {
  String upperCase = "TESTING";
  String lowerCase = "testing";
  TokenFilterFactory factory = tokenFilterFactory("CollationKey",
      "language", "en",
      "strength", "secondary",
      "decomposition", "no");
  TokenStream tsUpper = factory.create(
      new MockTokenizer(new StringReader(upperCase), MockTokenizer.KEYWORD, false));
  TokenStream tsLower = factory.create(
      new MockTokenizer(new StringReader(lowerCase), MockTokenizer.KEYWORD, false));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:SolrStopwordsCarrot2LexicalDataFactory.java   
/**
 * Obtains stop words for a field from the associated
 * {@link StopFilterFactory}, if any.
 */
private Collection<CharArraySet> getSolrStopWordsForField(String fieldName) {
  // No need to synchronize here, Carrot2 ensures that instances
  // of this class are not used by multiple threads at a time.
  if (!solrStopWords.containsKey(fieldName)) {
    final Analyzer fieldAnalyzer = core.getLatestSchema().getFieldType(fieldName)
        .getIndexAnalyzer();
    if (fieldAnalyzer instanceof TokenizerChain) {
      final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer)
          .getTokenFilterFactories();
      for (TokenFilterFactory factory : filterFactories) {
        if (factory instanceof StopFilterFactory) {
          // StopFilterFactory holds the stop words in a CharArraySet
          solrStopWords.put(fieldName,
              ((StopFilterFactory) factory).getStopWords());
        }

        if (factory instanceof CommonGramsFilterFactory) {
          solrStopWords.put(fieldName,
              ((CommonGramsFilterFactory) factory)
                  .getCommonWords());
        }
      }
    }
  }
  return solrStopWords.get(fieldName);
}
项目:search    文件:TokenizerChain.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
  Tokenizer tk = tokenizer.create( aReader );
  TokenStream ts = tk;
  for (TokenFilterFactory filter : filters) {
    ts = filter.create(ts);
  }
  return new TokenStreamComponents(tk, ts);
}
项目:search    文件:SolrResourceLoader.java   
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after {@link #addToClassLoader(String, FileFilter, boolean)}
 * and {@link #addToClassLoader(String,FileFilter,boolean)} before using
 * this ResourceLoader.
 */
void reloadLuceneSPI() {
  // Codecs:
  PostingsFormat.reloadPostingsFormats(this.classLoader);
  DocValuesFormat.reloadDocValuesFormats(this.classLoader);
  Codec.reloadCodecs(this.classLoader);
  // Analysis:
  CharFilterFactory.reloadCharFilters(this.classLoader);
  TokenFilterFactory.reloadTokenFilters(this.classLoader);
  TokenizerFactory.reloadTokenizers(this.classLoader);
}
项目:search    文件:MultiTermTest.java   
@Test
public void testQueryCopiedToMulti() {
  SchemaField field = h.getCore().getLatestSchema().getField("content_charfilter");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue(factory instanceof LowerCaseFilterFactory);
  }

  assertTrue(tc.getCharFilterFactories().length == 1);
  assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
}
项目:search    文件:MultiTermTest.java   
@Test
public void testDefaultCopiedToMulti() {
  SchemaField field = h.getCore().getLatestSchema().getField("content_ws");
  Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
  assertTrue(analyzer instanceof TokenizerChain);
  assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
  TokenizerChain tc = (TokenizerChain) analyzer;
  for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
    assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
  }

  assertTrue(tc.getCharFilterFactories() == null);

}
项目:search    文件:ResourceLoaderTest.java   
public void testLoadDeprecatedFactory() throws Exception {
  SolrResourceLoader loader = new SolrResourceLoader("solr/collection1");
  // ensure we get our exception
  loader.newInstance(DeprecatedTokenFilterFactory.class.getName(), TokenFilterFactory.class, null,
      new Class[] { Map.class }, new Object[] { new HashMap<String,String>() });
  // TODO: How to check that a warning was printed to log file?
  loader.close();    
}
项目:sdb2    文件:IndexerServiceImpl.java   
@Override
public void index(final IndexType indexType, final Collection<Song> songs) {
    executor.execute(new Runnable() {
        @Override
        public void run() {
            Stopwatch stopwatch = Stopwatch.createStarted();

            Directory directory = new RAMDirectory();
            try {
                LOG.debug("available tokenizers: {}", TokenizerFactory.availableTokenizers());
                LOG.debug("available token filters: {}", TokenFilterFactory.availableTokenFilters());
                Analyzer analyzer = CustomAnalyzer.builder()
                    .withTokenizer("standard")
                    .addTokenFilter("lowercase")
                    .addTokenFilter("ngram", "minGramSize", "1", "maxGramSize", "25")
                    .build();
                IndexWriterConfig config = new IndexWriterConfig(analyzer);
                try (IndexWriter writer = new IndexWriter(directory, config)) {
                    for (Song song : songs) {
                        Document document = createDocument(song);
                        writer.addDocument(document);
                        songByUuid.put(song.getUUID(), song);
                    }
                } catch (IOException e) {
                    LOG.warn("couldn't index songs", e);
                }
            } catch (IOException e1) {
                LOG.warn("couldn't create analyzer", e1);
            } finally {
                putIndex(indexType, directory);
                stopwatch.stop();
                LOG.info("indexing songs in background thread took {}", stopwatch.toString());
            }
        }
    });
}
项目:community-edition-old    文件:AlfrescoFieldType.java   
public void add(Object current)
{
    if (!(current instanceof MultiTermAwareComponent))
        return;
    AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent();
    if (newComponent instanceof TokenFilterFactory)
    {
        if (filters == null)
        {
            filters = new ArrayList<TokenFilterFactory>(2);
        }
        filters.add((TokenFilterFactory) newComponent);
    }
    else if (newComponent instanceof TokenizerFactory)
    {
        tokenizer = (TokenizerFactory) newComponent;
    }
    else if (newComponent instanceof CharFilterFactory)
    {
        if (charFilters == null)
        {
            charFilters = new ArrayList<CharFilterFactory>(1);
        }
        charFilters.add((CharFilterFactory) newComponent);

    }
    else
    {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
    }
}
项目:NYBC    文件:AnalyzerFactoryTask.java   
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
项目:NYBC    文件:AnalyzerFactory.java   
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
项目:NYBC    文件:AnalyzerFactory.java   
@Override
public String toString() {
  StringBuilder sb = new StringBuilder("AnalyzerFactory(");
  if (null != name) {
    sb.append("name:");
    sb.append(name);
    sb.append(", ");
  }
  if (null != positionIncrementGap) {
    sb.append("positionIncrementGap:");
    sb.append(positionIncrementGap);
    sb.append(", ");
  }
  if (null != offsetGap) {
    sb.append("offsetGap:");
    sb.append(offsetGap);
    sb.append(", ");
  }
  for (CharFilterFactory charFilterFactory: charFilterFactories) {
    sb.append(charFilterFactory);
    sb.append(", ");
  }
  sb.append(tokenizerFactory);
  for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
    sb.append(", ");
    sb.append(tokenFilterFactory);
  }
  sb.append(')');
  return sb.toString();
}
项目:NYBC    文件:TestFactories.java   
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }

  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }

  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}