Java 类org.apache.lucene.analysis.ja.tokenattributes.BaseFormAttribute 实例源码

项目:easyjasub    文件:LuceneParser.java   
private void addAttributes(TokenStream tokenStream) {
    tokenStream.addAttribute(OffsetAttribute.class);
    tokenStream.addAttribute(ReadingAttribute.class);
    tokenStream.addAttribute(PartOfSpeechAttribute.class);
    tokenStream.addAttribute(InflectionAttribute.class);
    tokenStream.addAttribute(BaseFormAttribute.class);
}
项目:easyjasub    文件:LuceneParser.java   
private void readBaseForm(TokenStream tokenStream, LuceneToken token) {
    BaseFormAttribute baseForm = tokenStream
            .getAttribute(BaseFormAttribute.class);
    if (baseForm != null) {
        token.setBaseForm(baseForm.getBaseForm());
    }
}
项目:langpi    文件:JaSegmenter.java   
@Override
public List<String> segmentWords(String text) {

    List<String> ret = new ArrayList<String>();

    StringReader textreader = new StringReader(text);
    JapaneseTokenizer segmenter = 
            new JapaneseTokenizer(textreader, null, true, JapaneseTokenizer.Mode.SEARCH);

    JaStemmer.lemma.clear();
    CharTermAttribute termAtt = segmenter.getAttribute(CharTermAttribute.class);
    BaseFormAttribute baseAtt = segmenter.getAttribute(BaseFormAttribute.class);
    try {
        segmenter.reset();
        while (segmenter.incrementToken()){
            //segmenter.clearAttributes();
            ret.add(termAtt.toString());
            if(baseAtt.getBaseForm()!=null)
                JaStemmer.lemma.put(termAtt.toString(), baseAtt.getBaseForm());
        }

        segmenter.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block.
        e.printStackTrace();
    }

    return ret;
}