Java 类org.apache.lucene.analysis.util.WordlistLoader 实例源码

项目:search    文件:TestWordlistLoader.java   
/**
 * Test stopwords in snowball format
 */
public void testSnowballListLoading() throws IOException {
  String s = 
    "|comment\n" + // commented line
    " |comment\n" + // commented line with leading whitespace
    "\n" + // blank line
    "  \t\n" + // line with only whitespace
    " |comment | comment\n" + // commented line with comment
    "ONE\n" + // stopword, in uppercase
    "   two   \n" + // stopword with leading/trailing space
    " three   four five \n" + // multiple stopwords
    "six seven | comment\n"; //multiple stopwords + comment
  CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s));
  assertEquals(7, wordset.size());
  assertTrue(wordset.contains("ONE"));
  assertTrue(wordset.contains("two"));
  assertTrue(wordset.contains("three"));
  assertTrue(wordset.contains("four"));
  assertTrue(wordset.contains("five"));
  assertTrue(wordset.contains("six"));
  assertTrue(wordset.contains("seven"));
}
项目:NYBC    文件:TestWordlistLoader.java   
/**
 * Test stopwords in snowball format
 */
public void testSnowballListLoading() throws IOException {
  String s = 
    "|comment\n" + // commented line
    " |comment\n" + // commented line with leading whitespace
    "\n" + // blank line
    "  \t\n" + // line with only whitespace
    " |comment | comment\n" + // commented line with comment
    "ONE\n" + // stopword, in uppercase
    "   two   \n" + // stopword with leading/trailing space
    " three   four five \n" + // multiple stopwords
    "six seven | comment\n"; //multiple stopwords + comment
  CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT);
  assertEquals(7, wordset.size());
  assertTrue(wordset.contains("ONE"));
  assertTrue(wordset.contains("two"));
  assertTrue(wordset.contains("three"));
  assertTrue(wordset.contains("four"));
  assertTrue(wordset.contains("five"));
  assertTrue(wordset.contains("six"));
  assertTrue(wordset.contains("seven"));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestWordlistLoader.java   
/**
 * Test stopwords in snowball format
 */
public void testSnowballListLoading() throws IOException {
  String s = 
    "|comment\n" + // commented line
    " |comment\n" + // commented line with leading whitespace
    "\n" + // blank line
    "  \t\n" + // line with only whitespace
    " |comment | comment\n" + // commented line with comment
    "ONE\n" + // stopword, in uppercase
    "   two   \n" + // stopword with leading/trailing space
    " three   four five \n" + // multiple stopwords
    "six seven | comment\n"; //multiple stopwords + comment
  CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s), TEST_VERSION_CURRENT);
  assertEquals(7, wordset.size());
  assertTrue(wordset.contains("ONE"));
  assertTrue(wordset.contains("two"));
  assertTrue(wordset.contains("three"));
  assertTrue(wordset.contains("four"));
  assertTrue(wordset.contains("five"));
  assertTrue(wordset.contains("six"));
  assertTrue(wordset.contains("seven"));
}
项目:theSemProject    文件:MyAnalyzer.java   
/**
 * Ritorna il set di stop words di default per una lingua
 *
 * @param language lingua
 * @return set di stop words
 */
public static CharArraySet getDefaultStopSet(String language) {
    try {
        if ("en".equalsIgnoreCase(language)) {
            return StandardAnalyzer.STOP_WORDS_SET;
        } else if ("es".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "spanish_stop.txt", StandardCharsets.UTF_8));
        } else if ("fr".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "french_stop.txt", StandardCharsets.UTF_8));
        } else if ("de".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "german_stop.txt", StandardCharsets.UTF_8));
        } else if ("pl".equalsIgnoreCase(language)) {
            return WordlistLoader.getWordSet(IOUtils.getDecodingReader(PolishAnalyzer.class, "stopwords.txt", StandardCharsets.UTF_8), "#");
        } else if ("pt".equalsIgnoreCase(language) || "br".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "portuguese_stop.txt", StandardCharsets.UTF_8));
        } else if ("it".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "italian_stop.txt", StandardCharsets.UTF_8));
        } else if ("cz".equalsIgnoreCase(language) || "sk".equalsIgnoreCase(language)) {
            return WordlistLoader.getWordSet(IOUtils.getDecodingReader(CzechAnalyzer.class, "stopwords.txt", StandardCharsets.UTF_8), "#");
        } else if ("tr".equalsIgnoreCase(language)) {
            return TurkishAnalyzer.loadStopwordSet(false, TurkishAnalyzer.class, "stopwords.txt", "#");
        } else if ("ru".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "russian_stop.txt", StandardCharsets.UTF_8));
        } else if ("ro".equalsIgnoreCase(language)) {
            return RomanianAnalyzer.loadStopwordSet(false, RomanianAnalyzer.class, "stopwords.txt", "#");
        } else if ("bg".equalsIgnoreCase(language)) {
            return BulgarianAnalyzer.loadStopwordSet(false, BulgarianAnalyzer.class, "stopwords.txt", "#");
        } else if ("nl".equalsIgnoreCase(language)) {
            return WordlistLoader.getSnowballWordSet(IOUtils.getDecodingReader(SnowballFilter.class, "dutch_stop.txt", StandardCharsets.UTF_8));
        }
    } catch (Exception ignored) {
        throw new RuntimeException("Unable to load default stopword set");
    }
    return StandardAnalyzer.STOP_WORDS_SET;

}
项目:search    文件:SmartChineseAnalyzer.java   
static CharArraySet loadDefaultStopWordSet() throws IOException {
  // make sure it is unmodifiable as we expose it in the outer class
  return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
      .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
          StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT,
      Version.LATEST));
}
项目:search    文件:TestWordlistLoader.java   
public void testWordlistLoading() throws IOException {
  String s = "ONE\n  two \nthree";
  CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s));
  checkSet(wordSet1);
  CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)));
  checkSet(wordSet2);
}
项目:search    文件:TestWordlistLoader.java   
public void testComments() throws Exception {
  String s = "ONE\n  two \nthree\n#comment";
  CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#");
  checkSet(wordSet1);
  assertFalse(wordSet1.contains("#comment"));
  assertFalse(wordSet1.contains("comment"));
}
项目:search    文件:SolrResourceLoader.java   
public List<String> getLines(String resource, Charset charset) throws IOException{
  try {
    return WordlistLoader.getLines(openResource(resource), charset);
  } catch (CharacterCodingException ex) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
       "Error loading resource (wrong encoding?): " + resource, ex);
  }
}
项目:NYBC    文件:SmartChineseAnalyzer.java   
static CharArraySet loadDefaultStopWordSet() throws IOException {
  // make sure it is unmodifiable as we expose it in the outer class
  return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
      .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
          IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
      Version.LUCENE_CURRENT));
}
项目:NYBC    文件:TestWordlistLoader.java   
public void testWordlistLoading() throws IOException {
  String s = "ONE\n  two \nthree";
  CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT);
  checkSet(wordSet1);
  CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)), TEST_VERSION_CURRENT);
  checkSet(wordSet2);
}
项目:NYBC    文件:TestWordlistLoader.java   
public void testComments() throws Exception {
  String s = "ONE\n  two \nthree\n#comment";
  CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT);
  checkSet(wordSet1);
  assertFalse(wordSet1.contains("#comment"));
  assertFalse(wordSet1.contains("comment"));
}
项目:NYBC    文件:SolrResourceLoader.java   
public List<String> getLines(String resource, Charset charset) throws IOException{
  try {
    return WordlistLoader.getLines(openResource(resource), charset);
  } catch (CharacterCodingException ex) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
       "Error loading resource (wrong encoding?): " + resource, ex);
  }
}
项目:search-core    文件:SolrResourceLoader.java   
public List<String> getLines(String resource, Charset charset) throws IOException{
  try {
    return WordlistLoader.getLines(openResource(resource), charset);
  } catch (CharacterCodingException ex) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
       "Error loading resource (wrong encoding?): " + resource, ex);
  }
}
项目:read-open-source-code    文件:SmartChineseAnalyzer.java   
static CharArraySet loadDefaultStopWordSet() throws IOException {
  // make sure it is unmodifiable as we expose it in the outer class
  return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
      .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
          IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
      Version.LUCENE_CURRENT));
}
项目:read-open-source-code    文件:SolrResourceLoader.java   
public List<String> getLines(String resource, Charset charset) throws IOException{
  try {
    return WordlistLoader.getLines(openResource(resource), charset);
  } catch (CharacterCodingException ex) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
       "Error loading resource (wrong encoding?): " + resource, ex);
  }
}
项目:read-open-source-code    文件:SmartChineseAnalyzer.java   
static CharArraySet loadDefaultStopWordSet() throws IOException {
  // make sure it is unmodifiable as we expose it in the outer class
  return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
      .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
          IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
      Version.LUCENE_CURRENT));
}
项目:read-open-source-code    文件:SolrResourceLoader.java   
public List<String> getLines(String resource, Charset charset) throws IOException{
  try {
    return WordlistLoader.getLines(openResource(resource), charset);
  } catch (CharacterCodingException ex) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, 
       "Error loading resource (wrong encoding?): " + resource, ex);
  }
}
项目:Maskana-Gestor-de-Conocimiento    文件:SmartChineseAnalyzer.java   
static CharArraySet loadDefaultStopWordSet() throws IOException {
  // make sure it is unmodifiable as we expose it in the outer class
  return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
      .getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
          IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
      Version.LUCENE_CURRENT));
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestWordlistLoader.java   
public void testWordlistLoading() throws IOException {
  String s = "ONE\n  two \nthree";
  CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), TEST_VERSION_CURRENT);
  checkSet(wordSet1);
  CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)), TEST_VERSION_CURRENT);
  checkSet(wordSet2);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestWordlistLoader.java   
public void testComments() throws Exception {
  String s = "ONE\n  two \nthree\n#comment";
  CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#", TEST_VERSION_CURRENT);
  checkSet(wordSet1);
  assertFalse(wordSet1.contains("#comment"));
  assertFalse(wordSet1.contains("comment"));
}
项目:metka    文件:FinnishStopFilterFactory.java   
protected void initStopWords() {
    try {
        stopWords = WordlistLoader.getWordSet(IOUtils.getDecodingReader(getClass(),
                "stopwords.txt", StandardCharsets.UTF_8), "#", getLuceneMatchVersion());
    } catch (IOException ex) {
        throw new RuntimeException("Unable to load default stopword set");
    }
}
项目:maker    文件:SimpleChineseAnalyzer.java   
static CharArraySet loadDefaultStopWordSet() throws IOException {
    // make sure it is unmodifiable as we expose it in the outer class
    return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(
            IOUtils.getDecodingReader(SimpleChineseAnalyzer.class, DEFAULT_STOPWORD_FILE, StandardCharsets.UTF_8), STOPWORD_FILE_COMMENT));
}
项目:auto-phrase-tokenfilter    文件:AutoPhrasingQParserPlugin.java   
private List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
 }