Java 类org.apache.lucene.analysis.util.ClasspathResourceLoader 实例源码

项目:IK-Analyzer-2012FF    文件:IKSynonymAnalyzer.java   
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    Tokenizer token = new IKTokenizer(reader, useSmart);
    Map<String, String> paramsMap = new HashMap<String, String>();
    Configuration cfg = DefaultConfig.getInstance();
    paramsMap.put("luceneMatchVersion", luceneMatchVersion.toString());
    paramsMap.put("synonyms", cfg.getExtSynonymDictionarys().get(0));
    paramsMap.put("ignoreCase", "true");
    SynonymFilterFactory factory = new SynonymFilterFactory(paramsMap);
    ResourceLoader loader = new ClasspathResourceLoader();
    try {
        factory.inform(loader);
    } catch (IOException e) {
        e.printStackTrace();
    }
    return new TokenStreamComponents(token, factory.create(token));
}
项目:search    文件:TestKeepFilterFactory.java   
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
项目:solr-jdbc    文件:JdbcStopFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select stopword from stopwords");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 somestring test2 anotherstring"));

   JdbcStopFilterFactory factory = new JdbcStopFilterFactory(args);
   factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("test1", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("test2", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:NYBC    文件:TestPhoneticFilterFactory.java   
/**
 * Case: default
 */
public void testFactory() throws IOException {
  Map<String,String> args = new HashMap<String, String>();

  PhoneticFilterFactory ff = new PhoneticFilterFactory();

  args.put( PhoneticFilterFactory.ENCODER, "Metaphone" );
  ff.init( args );
  ff.inform(new ClasspathResourceLoader(ff.getClass()));
  assertTrue( ff.getEncoder() instanceof Metaphone );
  assertTrue( ff.inject ); // default

  args.put( PhoneticFilterFactory.INJECT, "false" );
  ff.init( args );
  ff.inform(new ClasspathResourceLoader(ff.getClass()));
  assertFalse( ff.inject );

  args.put( PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
  ff.init(args);
  ff.inform(new ClasspathResourceLoader(ff.getClass()));
  assertEquals(2, ((Metaphone) ff.getEncoder()).getMaxCodeLen());
}
项目:NYBC    文件:TestHyphenationCompoundWordTokenFilterFactory.java   
/**
 * Ensure the factory works with hyphenation grammar+dictionary: using default options.
 */
public void testHyphenationWithDictionary() throws Exception {
  Reader reader = new StringReader("min veninde som er lidt af en læsehest");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  Map<String,String> args = new HashMap<String,String>();
  args.put("hyphenator", "da_UTF8.xml");
  args.put("dictionary", "da_compoundDictionary.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  TokenStream stream = factory.create(tokenizer);

  assertTokenStreamContents(stream, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
项目:NYBC    文件:TestHyphenationCompoundWordTokenFilterFactory.java   
/**
 * Ensure the factory works with no dictionary: using hyphenation grammar only.
 * Also change the min/max subword sizes from the default. When using no dictionary,
 * its generally necessary to tweak these, or you get lots of expansions.
 */
public void testHyphenationOnly() throws Exception {
  Reader reader = new StringReader("basketballkurv");
  Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  Map<String,String> args = new HashMap<String,String>();
  args.put("hyphenator", "da_UTF8.xml");
  args.put("minSubwordSize", "2");
  args.put("maxSubwordSize", "4");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  TokenStream stream = factory.create(tokenizer);

  assertTokenStreamContents(stream,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );
}
项目:NYBC    文件:TestCommonGramsFilterFactory.java   
/**
 * If no words are provided, then a set of english default stopwords is used.
 */
public void testDefaults() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  Map<String, String> args = Collections.emptyMap();
  factory.init(args);
  factory.inform(loader);
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue(words.contains("the"));
  Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, 
      new String[] { "testing", "testing_the", "the", "the_factory", "factory" });
}
项目:NYBC    文件:TestCommonGramsQueryFilterFactory.java   
/**
 * If no words are provided, then a set of english default stopwords is used.
 */
public void testDefaults() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  Map<String, String> args = Collections.emptyMap();
  factory.init(args);
  factory.inform(loader);
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue(words.contains("the"));
  Tokenizer tokenizer = new MockTokenizer(new StringReader("testing the factory"), MockTokenizer.WHITESPACE, false);
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, 
      new String[] { "testing_the", "the_factory" });
}
项目:NYBC    文件:TestTypeTokenFilterFactory.java   
@Test
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
  Map<String, String> args = new HashMap<String, String>();
  args.put("types", "stoptypes-1.txt");
  args.put("enablePositionIncrements", "true");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(loader);
  Set<String> types = factory.getStopTypes();
  assertTrue("types is null and it shouldn't be", types != null);
  assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
  assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());

  factory = new TypeTokenFilterFactory();
  args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
  args.put("enablePositionIncrements", "false");
  args.put("useWhitelist","true");
  factory.init(args);
  factory.inform(loader);
  types = factory.getStopTypes();
  assertTrue("types is null and it shouldn't be", types != null);
  assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
  assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
}
项目:solr-jdbc-synonyms    文件:JdbcStopFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LUCENE_5_0_0.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select stopword from stopwords");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 somestring test2 anotherstring"));

   JdbcStopFilterFactory factory = new JdbcStopFilterFactory(args);
   factory.inform(new ClasspathResourceLoader());

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("test1", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("test2", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestKeepFilterFactory.java   
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
项目:search    文件:TestICUTokenizerFactory.java   
public void testMixedText() throws Exception {
  Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream stream = factory.create(newAttributeFactory(), reader);
  assertTokenStreamContents(stream,
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี",
      "This", "is", "a", "test", "ກວ່າ", "ດອກ"});
}
项目:search    文件:TestICUTokenizerFactory.java   
public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
  Reader reader = new StringReader
      ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
  final Map<String,String> args = new HashMap<>();
  args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream stream = factory.create(newAttributeFactory(), reader);
  assertTokenStreamContents(stream,
      new String[] { "One-two", "punch",
          "Brang", "not", "brung-it",
          "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}
项目:search    文件:TestICUTokenizerFactory.java   
/**
 * Specify more than one script/rule file pair.
 * Override default DefaultICUTokenizerConfig Thai script tokenization.
 * Use the same rule file for both scripts.
 */
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
  Reader reader = new StringReader
      ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
  final Map<String,String> args = new HashMap<>();
  args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream stream = factory.create(newAttributeFactory(), reader);
  assertTokenStreamContents(stream, new String[] { "Some", "English",
      "Немного русский.  ",
      "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
      "More", "English" });
}
项目:search    文件:TestPhoneticFilterFactory.java   
/**
 * Case: default
 */
public void testFactoryDefaults() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}
项目:search    文件:TestPhoneticFilterFactory.java   
public void testInjectFalse() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.INJECT, "false");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertFalse(factory.inject);
}
项目:search    文件:TestPhoneticFilterFactory.java   
public void testMaxCodeLength() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen());
}
项目:search    文件:TestPhoneticFilterFactory.java   
public void testUnknownEncoder() throws IOException {
  try {
    Map<String,String> args = new HashMap<>();
    args.put("encoder", "XXX");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
    fail();
  } catch (IllegalArgumentException expected) {
    assertTrue(expected.getMessage().contains("Error loading encoder"));
  }
}
项目:search    文件:TestPhoneticFilterFactory.java   
public void testUnknownEncoderReflection() throws IOException {
  try {
    Map<String,String> args = new HashMap<>();
    args.put("encoder", "org.apache.commons.codec.language.NonExistence");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
    fail();
  } catch (IllegalArgumentException expected) {
    assertTrue(expected.getMessage().contains("Error loading encoder"));
  }
}
项目:search    文件:TestPhoneticFilterFactory.java   
/**
 * Case: Reflection
 */
public void testFactoryReflection() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}
项目:search    文件:TestPhoneticFilterFactory.java   
/** 
 * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
 * so this effectively tests reflection without package name
 */
public void testFactoryReflectionCaverphone2() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Caverphone2);
  assertTrue(factory.inject); // default
}
项目:search    文件:TestPhoneticFilterFactory.java   
public void testFactoryReflectionCaverphone() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Caverphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Caverphone2);
  assertTrue(factory.inject); // default
}
项目:search    文件:TestPhoneticFilterFactory.java   
static void assertAlgorithm(String algName, String inject, String input,
    String[] expected) throws Exception {
  Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
  Map<String,String> args = new HashMap<>();
  args.put("encoder", algName);
  args.put("inject", inject);
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, expected);
}
项目:search    文件:TestSynonymFilterFactory.java   
/** test that we can parse and use the solr syn file, with the old impl
 * @deprecated Remove this test in Lucene 5.0 */
@Deprecated
public void testSynonymsOld() throws Exception {
  Reader reader = new StringReader("GB");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Synonym", Version.LUCENE_3_3, new ClasspathResourceLoader(getClass()),
      "synonyms", "synonyms.txt").create(stream);
  assertTrue(stream instanceof SlowSynonymFilter);
  assertTokenStreamContents(stream, 
      new String[] { "GB", "gib", "gigabyte", "gigabytes" },
      new int[] { 1, 0, 0, 0 });
}
项目:search    文件:TestUAX29URLEmailTokenizerFactory.java   
/** @deprecated nuke this test in lucene 5.0 */
@Deprecated
public void testMatchVersion() throws Exception {
  Reader reader = new StringReader("ざ");
  TokenStream stream = tokenizerFactory("UAX29URLEmail").create(reader);
  assertTokenStreamContents(stream, 
      new String[] {"ざ"});

  reader = new StringReader("ざ");
  stream = tokenizerFactory("UAX29URLEmail", Version.LUCENE_3_1, new ClasspathResourceLoader(getClass())).create(reader);
  assertTokenStreamContents(stream, 
      new String[] {"さ"}); // old broken behavior
}
项目:search    文件:TestCommonGramsFilterFactory.java   
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, 
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, 
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", TEST_VERSION_CURRENT, loader, 
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}
项目:search    文件:TestCommonGramsQueryFilterFactory.java   
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, 
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, 
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", TEST_VERSION_CURRENT, loader, 
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}
项目:search    文件:TestLengthFilterFactory.java   
public void test() throws Exception {
  Reader reader = new StringReader("foo foobar super-duper-trooper");
  TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
  stream = tokenFilterFactory("Length",
      Version.LUCENE_4_3, new ClasspathResourceLoader(getClass()),
      "min", "4",
      "max", "10",
      "enablePositionIncrements", "false").create(stream);
  assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 1 });
}
项目:search    文件:TestStopFilterFactory.java   
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-1.txt, stop-2.txt",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-snowball.txt",
      "format", "snowball",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));

  // defaults
  factory = (StopFilterFactory) tokenFilterFactory("Stop");
  assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
  assertEquals(false, factory.isIgnoreCase());
}
项目:solr-jdbc    文件:JdbcSynonymFilterFactoryTest.java   
/**
 * Test for {@link JdbcSynonymFilterFactory#create(TokenStream)}.
 */
@Test
public void create() throws Exception {
   Map<String, String> args = new HashMap<>();
   args.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, Version.LATEST.toString());
   args.put(JdbcReaderFactoryParams.DATASOURCE, "java:comp/env/dataSource");
   args.put(JdbcReaderFactoryParams.SQL, "select synonyms from synonyms");

   // White space tokenizer, to lower case tokenizer.
   MockTokenizer tokenizer = new MockTokenizer();
   tokenizer.setReader(new StringReader("test1 test2"));

   JdbcSynonymFilterFactory factory = new JdbcSynonymFilterFactory(args);
   factory.inform(new ClasspathResourceLoader(getClass().getClassLoader()));

   try (TokenStream stream = factory.create(tokenizer)) {
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      assertTrue(stream.incrementToken());
      assertEquals("testA", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testB", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testC", attribute.toString());
      assertTrue(stream.incrementToken());
      assertEquals("testD", attribute.toString());
      assertFalse(stream.incrementToken());
      stream.end();
   }
}
项目:solr-jdbc    文件:JdbcResourceLoaderTest.java   
@Test
public void openResource() throws Exception {
   ClasspathResourceLoader parent = new ClasspathResourceLoader(getClass().getClassLoader());
   JdbcReader reader = new TestJdbcReader("test=>test1,test2");
   Charset charset = Charset.forName("UTF-8");

   JdbcResourceLoader loader = new JdbcResourceLoader(parent, reader, charset);

   InputStream resource = loader.openResource(JdbcResourceLoader.DATABASE);
   StringWriter writer = new StringWriter();
   IOUtils.copy(resource, writer, charset);

   assertEquals("test=>test1,test2", writer.toString());
}
项目:NYBC    文件:TestICUTokenizerFactory.java   
public void testMixedText() throws Exception {
  Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
  ICUTokenizerFactory factory = new ICUTokenizerFactory();
  factory.init(new HashMap<String,String>());
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream stream = factory.create(reader);
  assertTokenStreamContents(stream,
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี",
      "This", "is", "a", "test", "ກວ່າ", "ດອກ"});
}
项目:NYBC    文件:TestICUTokenizerFactory.java   
public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
  Reader reader = new StringReader
      ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
  ICUTokenizerFactory factory = new ICUTokenizerFactory();
  final Map<String,String> args = new HashMap<String,String>();
  args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
  factory.init(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream stream = factory.create(reader);
  assertTokenStreamContents(stream,
      new String[] { "One-two", "punch",
          "Brang", "not", "brung-it",
          "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}
项目:NYBC    文件:TestICUTokenizerFactory.java   
/**
 * Specify more than one script/rule file pair.
 * Override default DefaultICUTokenizerConfig Thai script tokenization.
 * Use the same rule file for both scripts.
 */
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
  Reader reader = new StringReader
      ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
  ICUTokenizerFactory factory = new ICUTokenizerFactory();
  final Map<String,String> args = new HashMap<String,String>();
  args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
  factory.init(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream stream = factory.create(reader);
  assertTokenStreamContents(stream, new String[] { "Some", "English",
      "Немного русский.  ",
      "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
      "More", "English" });
}
项目:NYBC    文件:TestPhoneticFilterFactory.java   
/**
 * Case: Reflection
 */
public void testFactoryCaseReflection() throws IOException {
  Map<String,String> args = new HashMap<String, String>();

  PhoneticFilterFactory ff = new PhoneticFilterFactory();
  ClasspathResourceLoader loader = new ClasspathResourceLoader(ff.getClass());

  args.put( PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone" );
  ff.init( args );
  ff.inform( loader );
  assertTrue( ff.getEncoder() instanceof Metaphone );
  assertTrue( ff.inject ); // default

  // we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
  // so this effectively tests reflection without package name
  args.put( PhoneticFilterFactory.ENCODER, "Caverphone2" );
  ff.init( args );
  ff.inform( loader );
  assertTrue( ff.getEncoder() instanceof Caverphone2 );
  assertTrue( ff.inject ); // default

  // cross check with registry
  args.put( PhoneticFilterFactory.ENCODER, "Caverphone" );
  ff.init( args );
  ff.inform( loader );
  assertTrue( ff.getEncoder() instanceof Caverphone2 );
  assertTrue( ff.inject ); // default
}
项目:NYBC    文件:TestPhoneticFilterFactory.java   
static void assertAlgorithm(String algName, String inject, String input,
    String[] expected) throws Exception {
  Tokenizer tokenizer = new MockTokenizer(new StringReader(input), MockTokenizer.WHITESPACE, false);
  Map<String,String> args = new HashMap<String,String>();
  args.put("encoder", algName);
  args.put("inject", inject);
  PhoneticFilterFactory factory = new PhoneticFilterFactory();
  factory.init(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, expected);
}
项目:NYBC    文件:TestSynonymFilterFactory.java   
/** test that we can parse and use the solr syn file */
public void testSynonyms() throws Exception {
  SynonymFilterFactory factory = new SynonymFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("synonyms", "synonyms.txt");
  factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
  factory.init(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
  assertTrue(ts instanceof SynonymFilter);
  assertTokenStreamContents(ts, 
      new String[] { "GB", "gib", "gigabyte", "gigabytes" },
      new int[] { 1, 0, 0, 0 });
}
项目:NYBC    文件:TestSynonymFilterFactory.java   
/** test that we can parse and use the solr syn file, with the old impl
 * @deprecated Remove this test in Lucene 5.0 */
@Deprecated
public void testSynonymsOld() throws Exception {
  SynonymFilterFactory factory = new SynonymFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("synonyms", "synonyms.txt");
  factory.setLuceneMatchVersion(Version.LUCENE_33);
  factory.init(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  TokenStream ts = factory.create(new MockTokenizer(new StringReader("GB"), MockTokenizer.WHITESPACE, false));
  assertTrue(ts instanceof SlowSynonymFilter);
  assertTokenStreamContents(ts, 
      new String[] { "GB", "gib", "gigabyte", "gigabytes" },
      new int[] { 1, 0, 0, 0 });
}