Java 类org.apache.lucene.analysis.core.KeywordTokenizer 实例源码

项目:elasticsearch_my    文件:SimplePolishTokenFilterTests.java   
private void testToken(String source, String expected) throws IOException {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put("index.analysis.filter.myStemmer.type", "polish_stem")
            .build();
    TestAnalysis analysis = createTestAnalysis(index, settings, new AnalysisStempelPlugin());

    TokenFilterFactory filterFactory = analysis.tokenFilter.get("myStemmer");

    Tokenizer tokenizer = new KeywordTokenizer();
    tokenizer.setReader(new StringReader(source));
    TokenStream ts = filterFactory.create(tokenizer);

    CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    assertThat(ts.incrementToken(), equalTo(true));

    assertThat(term1.toString(), equalTo(expected));
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnoreWhitespace() throws Exception {
  String withSpace = "foo bar";
  String withoutSpace = "foobar";
  String withPunctuation = "foo-bar";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "primary",
      "alternate", "shifted",
      "variableTop", " ");
  TokenStream tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithoutSpace = factory.create(
      new KeywordTokenizer(new StringReader(withoutSpace)));
  assertCollatesToSame(tsWithSpace, tsWithoutSpace);
  // now assert that punctuation still matters: foo-bar < foo bar
  tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  assertCollation(tsWithPunctuation, tsWithSpace, -1);
}
项目:search    文件:TestSynonymMapFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
  }
}
项目:search    文件:EdgeNGramTokenFilterTest.java   
public void testSupplementaryCharacters() throws IOException {
  final String s = TestUtil.randomUnicodeString(random(), 10);
  final int codePointCount = s.codePointCount(0, s.length());
  final int minGram = TestUtil.nextInt(random(), 1, 3);
  final int maxGram = TestUtil.nextInt(random(), minGram, 10);
  TokenStream tk = new KeywordTokenizer(new StringReader(s));
  tk = new EdgeNGramTokenFilter(tk, minGram, maxGram);
  final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
  tk.reset();
  for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
    assertTrue(tk.incrementToken());
    assertEquals(0, offsetAtt.startOffset());
    assertEquals(s.length(), offsetAtt.endOffset());
    final int end = Character.offsetByCodePoints(s, 0, i);
    assertEquals(s.substring(0, end), termAtt.toString());
  }
  assertFalse(tk.incrementToken());
}
项目:search    文件:NGramTokenFilterTest.java   
public void testSupplementaryCharacters() throws IOException {
  final String s = TestUtil.randomUnicodeString(random(), 10);
  final int codePointCount = s.codePointCount(0, s.length());
  final int minGram = TestUtil.nextInt(random(), 1, 3);
  final int maxGram = TestUtil.nextInt(random(), minGram, 10);
  TokenStream tk = new KeywordTokenizer(new StringReader(s));
  tk = new NGramTokenFilter(tk, minGram, maxGram);
  final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
  tk.reset();
  for (int start = 0; start < codePointCount; ++start) {
    for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int startIndex = Character.offsetByCodePoints(s, 0, start);
      final int endIndex = Character.offsetByCodePoints(s, 0, end);
      assertEquals(s.substring(startIndex, endIndex), termAtt.toString());
    }
  }
  assertFalse(tk.incrementToken());
}
项目:search    文件:TestLucene47WordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new Lucene47WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:search    文件:TestWordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:search    文件:TestCodepointCountFilter.java   
public void testRandomStrings() throws IOException {
  for (int i = 0; i < 10000; i++) {
    String text = TestUtil.randomUnicodeString(random(), 100);
    int min = TestUtil.nextInt(random(), 0, 100);
    int max = TestUtil.nextInt(random(), 0, 100);
    int count = text.codePointCount(0, text.length());
    if(min>max){
      int temp = min;
      min = max;
      max = temp;
    }
    boolean expected = count >= min && count <= max;
    TokenStream stream = new KeywordTokenizer(new StringReader(text));
    stream = new CodepointCountFilter(stream, min, max);
    stream.reset();
    assertEquals(expected, stream.incrementToken());
    stream.end();
    stream.close();
  }
}
项目:search    文件:TestSnowballVocab.java   
/**
 * For the supplied language, run the stemmer against all strings in voc.txt
 * The output should be the same as the string in output.txt
 */
private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
    throws IOException {
  if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);

  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      Tokenizer t = new KeywordTokenizer(reader);
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };

  assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), 
      dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
}
项目:NYBC    文件:TestICUCollationKeyFilterFactory.java   
public void testNormalization() throws IOException {
  String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("locale", "tr");
  args.put("strength", "primary");
  args.put("decomposition", "canonical");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(turkishUpperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(turkishLowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:NYBC    文件:TestICUCollationKeyFilterFactory.java   
public void testSecondaryStrength() throws IOException {
  String upperCase = "TESTING";
  String lowerCase = "testing";
  ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("locale", "en");
  args.put("strength", "secondary");
  args.put("decomposition", "no");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(upperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(lowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:NYBC    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnorePunctuation() throws IOException {
  String withPunctuation = "foo-bar";
  String withoutPunctuation = "foo bar";
  ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("locale", "en");
  args.put("strength", "primary");
  args.put("alternate", "shifted");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  TokenStream tsWithoutPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withoutPunctuation)));
  assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
}
项目:NYBC    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnoreWhitespace() throws IOException {
  String withSpace = "foo bar";
  String withoutSpace = "foobar";
  String withPunctuation = "foo-bar";
  ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("locale", "en");
  args.put("strength", "primary");
  args.put("alternate", "shifted");
  args.put("variableTop", " ");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithoutSpace = factory.create(
      new KeywordTokenizer(new StringReader(withoutSpace)));
  assertCollatesToSame(tsWithSpace, tsWithoutSpace);
  // now assert that punctuation still matters: foo-bar < foo bar
  tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  assertCollation(tsWithPunctuation, tsWithSpace, -1);
}
项目:NYBC    文件:TestICUCollationKeyFilterFactory.java   
public void testUpperCaseFirst() throws IOException {
  String lower = "resume";
  String upper = "Resume";
  ICUCollationKeyFilterFactory factory = new ICUCollationKeyFilterFactory();
  Map<String,String> args = new HashMap<String,String>();
  args.put("locale", "en");
  args.put("strength", "tertiary");
  args.put("caseFirst", "upper");
  factory.init(args);
  factory.inform(new StringMockResourceLoader(""));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(lower)));
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(upper)));
  assertCollation(tsUpper, tsLower, -1);
}
项目:NYBC    文件:TestSynonymMapFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
  }
}
项目:NYBC    文件:TestWordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:NYBC    文件:TestSnowballVocab.java   
/**
 * For the supplied language, run the stemmer against all strings in voc.txt
 * The output should be the same as the string in output.txt
 */
private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
    throws IOException {
  if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);

  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      Tokenizer t = new KeywordTokenizer(reader);
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };

  assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), 
      dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
}
项目:elasticsearch-analysis-worddelimiter2    文件:TestWordDelimiterFilter2.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 1024; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter2(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestICUCollationKeyFilterFactory.java   
public void testIgnoreWhitespace() throws Exception {
  String withSpace = "foo bar";
  String withoutSpace = "foobar";
  String withPunctuation = "foo-bar";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "primary",
      "alternate", "shifted",
      "variableTop", " ");
  TokenStream tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithoutSpace = factory.create(
      new KeywordTokenizer(new StringReader(withoutSpace)));
  assertCollatesToSame(tsWithSpace, tsWithoutSpace);
  // now assert that punctuation still matters: foo-bar < foo bar
  tsWithSpace = factory.create(
      new KeywordTokenizer(new StringReader(withSpace)));
  TokenStream tsWithPunctuation = factory.create(
      new KeywordTokenizer(new StringReader(withPunctuation)));
  assertCollation(tsWithPunctuation, tsWithSpace, -1);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSynonymMapFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();

    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:EdgeNGramTokenFilterTest.java   
public void testSupplementaryCharacters() throws IOException {
  final String s = _TestUtil.randomUnicodeString(random(), 10);
  final int codePointCount = s.codePointCount(0, s.length());
  final int minGram = _TestUtil.nextInt(random(), 1, 3);
  final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
  TokenStream tk = new KeywordTokenizer(new StringReader(s));
  tk = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
  final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
  tk.reset();
  for (int i = minGram; i <= Math.min(codePointCount, maxGram); ++i) {
    assertTrue(tk.incrementToken());
    assertEquals(0, offsetAtt.startOffset());
    assertEquals(s.length(), offsetAtt.endOffset());
    final int end = Character.offsetByCodePoints(s, 0, i);
    assertEquals(s.substring(0, end), termAtt.toString());
  }
  assertFalse(tk.incrementToken());
}
项目:Maskana-Gestor-de-Conocimiento    文件:NGramTokenFilterTest.java   
public void testSupplementaryCharacters() throws IOException {
  final String s = _TestUtil.randomUnicodeString(random(), 10);
  final int codePointCount = s.codePointCount(0, s.length());
  final int minGram = _TestUtil.nextInt(random(), 1, 3);
  final int maxGram = _TestUtil.nextInt(random(), minGram, 10);
  TokenStream tk = new KeywordTokenizer(new StringReader(s));
  tk = new NGramTokenFilter(TEST_VERSION_CURRENT, tk, minGram, maxGram);
  final CharTermAttribute termAtt = tk.addAttribute(CharTermAttribute.class);
  final OffsetAttribute offsetAtt = tk.addAttribute(OffsetAttribute.class);
  tk.reset();
  for (int start = 0; start < codePointCount; ++start) {
    for (int end = start + minGram; end <= Math.min(codePointCount, start + maxGram); ++end) {
      assertTrue(tk.incrementToken());
      assertEquals(0, offsetAtt.startOffset());
      assertEquals(s.length(), offsetAtt.endOffset());
      final int startIndex = Character.offsetByCodePoints(s, 0, start);
      final int endIndex = Character.offsetByCodePoints(s, 0, end);
      assertEquals(s.substring(startIndex, endIndex), termAtt.toString());
    }
  }
  assertFalse(tk.incrementToken());
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestWordDelimiterFilter.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  for (int i = 0; i < 512; i++) {
    final int flags = i;
    final CharArraySet protectedWords;
    if (random.nextBoolean()) {
      protectedWords = new CharArraySet(TEST_VERSION_CURRENT, new HashSet<String>(Arrays.asList("a", "b", "cd")), false);
    } else {
      protectedWords = null;
    }

    Analyzer a = new Analyzer() { 
      @Override
      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
        Tokenizer tokenizer = new KeywordTokenizer(reader);
        return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
      }
    };
    // depending upon options, this thing may or may not preserve the empty term
    checkAnalysisConsistency(random, a, random.nextBoolean(), "");
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestSnowballVocab.java   
/**
 * For the supplied language, run the stemmer against all strings in voc.txt
 * The output should be the same as the string in output.txt
 */
private void assertCorrectOutput(final String snowballLanguage, String dataDirectory)
    throws IOException {
  if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);

  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
      Tokenizer t = new KeywordTokenizer(reader);
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };

  assertVocabulary(a, getDataFile("TestSnowballVocabData.zip"), 
      dataDirectory + "/voc.txt", dataDirectory + "/output.txt");
}
项目:elasticsearch-analysis-metaphone_ptBR    文件:MetaphoneTokenFilterTests.java   
@Test
public void testMetaphoneWords() throws Exception {
    Index index = new Index("test", "_na_");
    Settings settings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.filter.myStemmer.type", "br_metaphone")
            .build();

    AnalysisService analysisService = createAnalysisService(index, settings, new AnalysisMetaphonePlugin());

    TokenFilterFactory filterFactory = analysisService.tokenFilter("br_metaphone");

    Tokenizer tokenizer = new KeywordTokenizer();

    Map<String,String> words = buildWordList();

    Set<String> inputWords = words.keySet();
    for(String word : inputWords) {
        tokenizer.setReader(new StringReader(word));
        TokenStream ts = filterFactory.create(tokenizer);

        CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
        ts.reset();
        assertThat(ts.incrementToken(), equalTo(true));
        assertThat(term1.toString(), equalTo(words.get(word)));
        ts.close();
    }
}
项目:elasticsearch-analysis-phonetic-eudex    文件:EudexTokenizerFactory.java   
@Inject
public EudexTokenizerFactory(Index index,
                             IndexSettingsService indexSettingsService,
                             @Assisted String name,
                             @Assisted Settings settings) {
    super(index, indexSettingsService.indexSettings(), name, settings);
    this.factory = new EudexAttributeFactory();
    this.bufferSize = settings.getAsInt("buffersize", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
项目:elasticsearch-analysis-phonetic-eudex    文件:EudexAnalyzerProvider.java   
@Inject
public EudexAnalyzerProvider(Index index,
                             IndexSettingsService indexSettingsService,
                             @Assisted String name,
                             @Assisted Settings settings) {
    super(index, indexSettingsService.indexSettings(), name, settings);
    this.bufferSize = settings.getAsInt("buffersize", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
项目:elasticsearch-icu    文件:IcuCollationTokenizerFactory.java   
@Inject
public IcuCollationTokenizerFactory(Index index,
                                    @IndexSettings Settings indexSettings,
                                    @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);
    Collator collator = IcuCollationKeyAnalyzerProvider.createCollator(settings);
    this.factory = new ICUCollationAttributeFactory(collator);
    this.bufferSize = settings.getAsInt("buffer_size", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
项目:search    文件:TestSmartChineseAnalyzer.java   
public void testEmptyTerm() throws IOException {
  Random random = random();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new WordTokenFilter(tokenizer));
    }
  };
  checkAnalysisConsistency(random, a, random.nextBoolean(), "");
}
项目:search    文件:TestJapaneseBaseFormFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
}
项目:search    文件:TestJapaneseReadingFormFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
}
项目:search    文件:TestJapaneseKatakanaStemFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
}
项目:search    文件:TestICUNormalizer2Filter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
}
项目:search    文件:TestICUTransformFilter.java   
public void testOptimizer() throws Exception {
  String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
  Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
  assertTrue(custom.getFilter() == null);
  new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
  assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
}
项目:search    文件:TestICUTransformFilter.java   
public void testOptimizerSurrogate() throws Exception {
  String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
  Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
  assertTrue(custom.getFilter() == null);
  new ICUTransformFilter(new KeywordTokenizer(new StringReader("")), custom);
  assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
}
项目:search    文件:TestICUTransformFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
    }
  };
  checkOneTerm(a, "", "");
}
项目:search    文件:TestICUFoldingFilter.java   
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      Tokenizer tokenizer = new KeywordTokenizer(reader);
      return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testBasicUsage() throws Exception {
  String turkishUpperCase = "I WİLL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "tr",
      "strength", "primary");
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(turkishUpperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(turkishLowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testNormalization() throws Exception {
  String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
  String turkishLowerCase = "ı will use turkish casıng";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "tr",
      "strength", "primary",
      "decomposition", "canonical");
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(turkishUpperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(turkishLowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}
项目:search    文件:TestICUCollationKeyFilterFactory.java   
public void testSecondaryStrength() throws Exception {
  String upperCase = "TESTING";
  String lowerCase = "testing";
  TokenFilterFactory factory = tokenFilterFactory("ICUCollationKey",
      "locale", "en",
      "strength", "secondary",
      "decomposition", "no");
  TokenStream tsUpper = factory.create(
      new KeywordTokenizer(new StringReader(upperCase)));
  TokenStream tsLower = factory.create(
      new KeywordTokenizer(new StringReader(lowerCase)));
  assertCollatesToSame(tsUpper, tsLower);
}