Java 类org.apache.lucene.analysis.tokenattributes.TypeAttribute 实例源码

项目:elasticsearch-analysis-openkoreantext    文件:TokenStreamAssertions.java   
public static void assertTokenStream(TokenStream tokenStream, String[] expectedCharTerms, String[] expectedTypes, int[] expectedStartOffsets, int[] expectedEndOffsets) throws IOException {
    tokenStream.reset();
    int index = 0;
    while (tokenStream.incrementToken() == true) {
        assertEquals(expectedCharTerms[index], tokenStream.getAttribute(CharTermAttribute.class).toString());

        if(expectedTypes != null) {
            assertEquals(expectedTypes[index], tokenStream.getAttribute(TypeAttribute.class).type());
        }

        OffsetAttribute offsets = tokenStream.getAttribute(OffsetAttribute.class);

        if(expectedStartOffsets != null) {
            assertEquals(expectedStartOffsets[index], offsets.startOffset());
        }

        if(expectedEndOffsets != null) {
            assertEquals(expectedEndOffsets[index], offsets.endOffset());
        }

        index++;
    }
    tokenStream.end();
}
项目:lams    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:elasticsearch-analysis-ltp    文件:LTPTokenizer.java   
/**
 * Lucene constructor
 *
 * @throws UnirestException
 * @throws JSONException
 * @throws IOException
 */
public LTPTokenizer(Set<String> filter)
        throws IOException, JSONException, UnirestException {
    super();
    logger.info("LTPTokenizer Initialize......");
    // Add token offset attribute
    offsetAttr = addAttribute(OffsetAttribute.class);
    // Add token content attribute
    charTermAttr = addAttribute(CharTermAttribute.class);
    // Add token type attribute
    typeAttr = addAttribute(TypeAttribute.class);
    // Add token position attribute
    piAttr = addAttribute(PositionIncrementAttribute.class);
    // Create a new word segmenter to get tokens
    LTPSeg = new LTPWordSegmenter(input);
    // Add filter words set
    this.filter = filter;
}
项目:fastcatsearch3    文件:Token.java   
@Override
public void copyTo(AttributeImpl target) {
  if (target instanceof Token) {
    final Token to = (Token) target;
    to.reinit(this);
    // reinit shares the payload, so clone it:
    if (payload !=null) {
      to.payload = payload.clone();
    }
  } else {
    super.copyTo(target);
    ((OffsetAttribute) target).setOffset(startOffset, endOffset);
    ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
    ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
    ((FlagsAttribute) target).setFlags(flags);
    ((TypeAttribute) target).setType(type);
  }
}
项目:solrplugins    文件:CaseInsensitiveSortingTextField.java   
@Override
public BytesRef normalizeQueryTarget(String val, boolean strict, String fieldName, boolean appendExtraDelim) throws IOException {
  TokenStream ts = getQueryAnalyzer().tokenStream(fieldName, val);
  try {
    ts.reset();
    CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ts.getAttribute(TypeAttribute.class);
    String matchType = strict ? INDEXED_TOKEN_TYPE : NORMALIZED_TOKEN_TYPE;
    while (ts.incrementToken()) {
      if (matchType.equals(typeAtt.type())) {
        BytesRefBuilder ret = new BytesRefBuilder();
        ret.copyChars(termAtt.toString());
        if (!strict || appendExtraDelim) {
          ret.append(delimBytes, 0, delimBytes.length);
        }
        return ret.get();
      }
    }
    return new BytesRef(BytesRef.EMPTY_BYTES);
  } finally {
    ts.close();
  }
}
项目:solrplugins    文件:TokenTypeJoinFilterTest.java   
/** verify that payload gets picked up for 1st group of tokens */
public void testTypeForPayload1() throws IOException {
  TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(new TokenArrayTokenizer(tokensWithPayloads), new String[] {"normalized", "filing", "prefix"},
          "joined", "normalized", "!", false, false);
  CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class);
  ttjf.reset();

  assertTrue(ttjf.incrementToken());

  assertEquals("unconsoled!Unconsoled!The ", termAtt.toString());
  assertEquals("joined", typeAtt.type());
  assertEquals("payload1", payloadAtt.getPayload().utf8ToString());

  assertTrue(ttjf.incrementToken());

  assertEquals("room with a view!Room With A View!A ", termAtt.toString());
  assertEquals("joined", typeAtt.type());
  assertNull(payloadAtt.getPayload());

  assertFalse(ttjf.incrementToken());
}
项目:solrplugins    文件:TokenTypeJoinFilterTest.java   
/** verify that payload gets picked up for 2nd group of tokens */
public void testTypeForPayload2() throws IOException {
  TokenTypeJoinFilter ttjf = new TokenTypeJoinFilter(new TokenArrayTokenizer(tokensWithPayloads), new String[] {"normalized", "filing", "prefix"},
          "joined", "filing", "!", false, false);
  CharTermAttribute termAtt = ttjf.getAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = ttjf.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = ttjf.getAttribute(PayloadAttribute.class);
  ttjf.reset();

  assertTrue(ttjf.incrementToken());

  assertEquals("unconsoled!Unconsoled!The ", termAtt.toString());
  assertEquals("joined", typeAtt.type());
  assertNull(payloadAtt.getPayload());

  assertTrue(ttjf.incrementToken());

  assertEquals("room with a view!Room With A View!A ", termAtt.toString());
  assertEquals("joined", typeAtt.type());
  assertEquals("payload2", payloadAtt.getPayload().utf8ToString());

  assertFalse(ttjf.incrementToken());
}
项目:solrplugins    文件:JsonReferencePayloadTokenizerTest.java   
@Test
public void testShorthand2() throws IOException {
  JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
  tokenizer.setReader(new StringReader("{\"filing\": \"something\", \"prefix\": \"The \"}"));
  tokenizer.reset();

  assertTrue(tokenizer.incrementToken());
  assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertTrue(tokenizer.incrementToken());
  assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertFalse(tokenizer.incrementToken());
}
项目:solrplugins    文件:JsonReferencePayloadTokenizerTest.java   
@Test
public void testShorthand3() throws IOException {
  JsonReferencePayloadTokenizer tokenizer = new JsonReferencePayloadTokenizer();
  tokenizer.setReader(new StringReader("{\"prefix\": \"The \", \"filing\": \"something\"}"));
  tokenizer.reset();

  assertTrue(tokenizer.incrementToken());
  assertEquals("something", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_FILING, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(1, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertTrue(tokenizer.incrementToken());
  assertEquals("The ", tokenizer.getAttribute(CharTermAttribute.class).toString());
  assertEquals(JsonReferencePayloadTokenizer.TYPE_PREFIX, tokenizer.getAttribute(TypeAttribute.class).type());
  assertEquals(0, tokenizer.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
  assertNull(tokenizer.getAttribute(PayloadAttribute.class).getPayload());

  assertFalse(tokenizer.incrementToken());
}
项目:Alix    文件:Demo.java   
public static MyToken[] tokensFromAnalysis(Analyzer analyzer, String text, String field) throws IOException
{
  ;
  TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
  CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
  PositionIncrementAttribute positionIncrementAttr = stream.addAttribute(PositionIncrementAttribute.class);
  TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);
  OffsetAttribute offsetAttr = stream.addAttribute(OffsetAttribute.class);

  ArrayList<MyToken> tokenList = new ArrayList<MyToken>();
  while (stream.incrementToken()) {
    tokenList.add(new MyToken(term.toString(), positionIncrementAttr.getPositionIncrement(), typeAttr.type(),
        offsetAttr.startOffset(), offsetAttr.endOffset()));
  }

  return tokenList.toArray(new MyToken[0]);
}
项目:hanlp-lucene-plugin    文件:HanLPAnalyzerTest.java   
public void testCreateComponents() throws Exception
{
    String text = "中华人民共和国很辽阔";
    for (int i = 0; i < text.length(); ++i)
    {
        System.out.print(text.charAt(i) + "" + i + " ");
    }
    System.out.println();
    Analyzer analyzer = new HanLPAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("field", text);
    tokenStream.reset();
    while (tokenStream.incrementToken())
    {
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:hanlp-lucene-plugin    文件:HanLPAnalyzerTest.java   
public void testIssue() throws Exception
{
    Map<String, String> args = new TreeMap<>();
    args.put("enableTraditionalChineseMode", "true");
    args.put("enableNormalization", "true");
    HanLPTokenizerFactory factory = new HanLPTokenizerFactory(args);
    Tokenizer tokenizer = factory.create();
    String text = "會辦台星保證最低價的原因?";

    tokenizer.setReader(new StringReader(text));
    tokenizer.reset();
    while (tokenizer.incrementToken())
    {
        CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:hanlp-lucene-plugin    文件:HanLPIndexAnalyzerTest.java   
public void testCreateComponents() throws Exception
{
    String text = "中华人民共和国很辽阔";
    for (int i = 0; i < text.length(); ++i)
    {
        System.out.print(text.charAt(i) + "" + i + " ");
    }
    System.out.println();
    Analyzer analyzer = new HanLPIndexAnalyzer();
    TokenStream tokenStream = analyzer.tokenStream("field", text);
    tokenStream.reset();
    while (tokenStream.incrementToken())
    {
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenStream.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenStream.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenStream.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
项目:elasticsearch-analysis-url    文件:URLTokenFilter.java   
/**
 * Tokenize the given input using a {@link URLTokenizer}. Settings which have been set on this {@link URLTokenFilter}
 * will be passed along to the tokenizer.
 * @param input a string to be tokenized
 * @return a list of tokens extracted from the input string
 * @throws IOException
 */
private List<Token> tokenize(String input) throws IOException {
    List<Token> tokens = new ArrayList<>();
    URLTokenizer tokenizer = new URLTokenizer();
    // create a copy of the parts list to avoid ConcurrentModificationException when sorting
    tokenizer.setParts(new ArrayList<>(parts));
    tokenizer.setUrlDecode(urlDeocde);
    tokenizer.setTokenizeHost(tokenizeHost);
    tokenizer.setTokenizePath(tokenizePath);
    tokenizer.setTokenizeQuery(tokenizeQuery);
    tokenizer.setAllowMalformed(allowMalformed || passthrough);
    tokenizer.setTokenizeMalformed(tokenizeMalformed);
    tokenizer.setReader(new StringReader(input));
    tokenizer.reset();

    String term;
    URLPart part;
    OffsetAttribute offset;
    while (tokenizer.incrementToken()) {
        term = tokenizer.getAttribute(CharTermAttribute.class).toString();
        part = URLPart.fromString(tokenizer.getAttribute(TypeAttribute.class).type());
        offset = tokenizer.getAttribute(OffsetAttribute.class);
        tokens.add(new Token(term, part, offset.startOffset(), offset.endOffset()));
    }
    return tokens;
}
项目:elasticsearch-twitter-korean    文件:AnalyzerTestUtil.java   
protected List<TestToken> collectExtractedNouns(TokenStream stream) throws IOException {
    CharTermAttribute charTermAtt = stream.addAttribute(CharTermAttribute.class);
    OffsetAttribute offSetAtt = stream.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAttr = stream.addAttribute(TypeAttribute.class);

    List<TestToken> extractedTokens = Lists.newArrayList();

    while(stream.incrementToken()) {
        TestToken t = getToken(charTermAtt.toString(), offSetAtt.startOffset(), offSetAtt.endOffset());

        System.out.println("termAtt.term() : " + charTermAtt.toString());
        System.out.println("startoffSetAtt : " + offSetAtt.startOffset());
        System.out.println("endoffSetAtt : " + offSetAtt.endOffset());
        System.out.println("typeAttr : " + typeAttr.toString());

        extractedTokens.add(t);
    }

    return extractedTokens;
}
项目:search    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:search    文件:TestSnowball.java   
public void testFilterTokens() throws Exception {
  SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
  TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
  FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);

  filter.incrementToken();

  assertEquals("accent", termAtt.toString());
  assertEquals(2, offsetAtt.startOffset());
  assertEquals(7, offsetAtt.endOffset());
  assertEquals("wrd", typeAtt.type());
  assertEquals(3, posIncAtt.getPositionIncrement());
  assertEquals(77, flagsAtt.getFlags());
  assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
}
项目:search    文件:TestNumericTokenStream.java   
public void testLongStream() throws Exception {
  final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  assertNotNull(bytesAtt);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  assertNotNull(typeAtt);
  final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
  assertNotNull(numericAtt);
  final BytesRef bytes = bytesAtt.getBytesRef();
  stream.reset();
  assertEquals(64, numericAtt.getValueSize());
  for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    bytesAtt.fillBytesRef();
    assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
    assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
项目:search    文件:TestNumericTokenStream.java   
public void testIntStream() throws Exception {
  final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  assertNotNull(bytesAtt);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  assertNotNull(typeAtt);
  final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
  assertNotNull(numericAtt);
  final BytesRef bytes = bytesAtt.getBytesRef();
  stream.reset();
  assertEquals(32, numericAtt.getValueSize());
  for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    bytesAtt.fillBytesRef();
    assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
    assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
项目:search    文件:SpellingQueryConverter.java   
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
项目:solr-multilingual-analyzer    文件:MultiLangTokenizer.java   
private void handleTokenStream(Map<Integer, List<Token>> tokenPosMap, TokenStream tokenStream) throws IOException {
    tokenStream.reset();
    int pos = 0;

    CharTermAttribute charTermAttribute = getCharTermAttribute(tokenStream);
    OffsetAttribute offsetAttribute = getOffsetAttribute(tokenStream);
    TypeAttribute typeAttribute = getTypeAttribute(tokenStream);
    PositionIncrementAttribute positionIncrementAttribute = getPositionIncrementAttribute(tokenStream);

    while (tokenStream.incrementToken()) {
        if (null == charTermAttribute || null == offsetAttribute) {
            return;
        }
        Token token = new Token(charTermAttribute.buffer(), 0, charTermAttribute.length(),
                offsetAttribute.startOffset(), offsetAttribute.endOffset());
        if (null != typeAttribute) {
            token.setType(typeAttribute.type());
        }
        pos += null != positionIncrementAttribute ? positionIncrementAttribute.getPositionIncrement() : 1;
        if (!tokenPosMap.containsKey(pos)) {
            tokenPosMap.put(pos, new LinkedList<Token>());
        }
        tokenPosMap.get(pos).add(token);
    }
    tokenStream.close();
}
项目:community-edition-old    文件:PathTokenFilterTest.java   
public void testAttributesAfterStreamEnd() throws IOException
{
    final String path = "uri1:one";
    StringReader reader = new StringReader(path);
    PathTokenFilter ts = new PathTokenFilter(reader, PathTokenFilter.PATH_SEPARATOR,
            PathTokenFilter.SEPARATOR_TOKEN_TEXT, PathTokenFilter.NO_NS_TOKEN_TEXT,
            PathTokenFilter.NAMESPACE_START_DELIMITER, PathTokenFilter.NAMESPACE_END_DELIMITER, true);

    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

    // PathTokenFilter.end() will be called after all tokens consumed.
    tokenise(ts, new String[]{"uri1", "one"});

    // Check attributes cleaned up
    assertEquals("", termAtt.toString());
    assertEquals("word", typeAtt.type()); // the default
    assertEquals(0, posIncAtt.getPositionIncrement());
    // Final offset...
    assertEquals(path.length(), offsetAtt.startOffset());
    assertEquals(path.length(), offsetAtt.endOffset());
}
项目:pinyinTokenFilter    文件:TestPinyinTransformTokenFilter.java   
@Test
public void testFull() throws IOException {
    this.filter = new PinyinTransformTokenFilter(tokenizer);
    this.filter.reset();
    int position = 0;
    while (this.filter.incrementToken()) {
        CharTermAttribute termAtt = this.filter.getAttribute(CharTermAttribute.class);
        String token = termAtt.toString();
        int increment = this.filter.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
        position += increment;
        OffsetAttribute offset = this.filter.getAttribute(OffsetAttribute.class);
        TypeAttribute type = this.filter.getAttribute(TypeAttribute.class);
        System.out.println(position + "[" + offset.startOffset() + "," + offset.endOffset() + "} (" + type
                .type() + ") " + token);
    }
    assertTrue(position == 4);
}
项目:pinyinTokenFilter    文件:TestPinyinTransformTokenFilter.java   
@Test
public void testFullWithNoChineseOut() throws IOException {
    this.filter = new PinyinTransformTokenFilter(tokenizer, false, 1, false);
    this.filter.reset();
    int position = 0;
    while (this.filter.incrementToken()) {
        CharTermAttribute termAtt = this.filter.getAttribute(CharTermAttribute.class);
        String token = termAtt.toString();
        int increment = this.filter.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
        position += increment;
        OffsetAttribute offset = this.filter.getAttribute(OffsetAttribute.class);
        TypeAttribute type = this.filter.getAttribute(TypeAttribute.class);
        System.out.println(position + "[" + offset.startOffset() + "," + offset.endOffset() + "} (" + type
                .type() + ") " + token);
    }
    assertTrue(position == 3);
}
项目:pinyinTokenFilter    文件:TestPinyinTransformTokenFilter.java   
@Test
public void testShort() throws IOException {
    this.filter = new PinyinTransformTokenFilter(tokenizer, true);
    this.filter.reset();
    int position = 0;
    while (this.filter.incrementToken()) {
        CharTermAttribute termAtt = this.filter.getAttribute(CharTermAttribute.class);
        String token = termAtt.toString();
        int increment = this.filter.getAttribute(PositionIncrementAttribute.class).getPositionIncrement();
        position += increment;
        OffsetAttribute offset = this.filter.getAttribute(OffsetAttribute.class);
        TypeAttribute type = this.filter.getAttribute(TypeAttribute.class);
        System.out.println(position + "[" + offset.startOffset() + "," + offset.endOffset() + "} (" + type
                .type() + ") " + token);
    }
    assertTrue(position == 4);
}
项目:lucene-korean    文件:KoreanAnalyzerTest.java   
public void testStandardTokenizer() throws Exception {

        String source = "우리나라라면에서부터 일본라면이 파생되었잖니?";
        source = "너는 너는 다시 내게 돌아 올거야. school is a good place 呵呵大笑 呵呵大笑";

        long start = System.currentTimeMillis();

        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new StandardFilter(Version.LUCENE_36, stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer(), 0, termAttr.length()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
项目:lucene-korean    文件:KoreanAnalyzerTest.java   
public void testHanjaConvert() throws Exception {

        String source = "呵呵大笑  落落長松 ";

        long start = System.currentTimeMillis();

        KoreanAnalyzer analyzer = new KoreanAnalyzer();
        TokenStream stream = analyzer.tokenStream("s", new StringReader(source));
        TokenStream tok = new KoreanFilter(stream);

        while (tok.incrementToken()) {
            CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class);
            OffsetAttribute offAttr = stream.getAttribute(OffsetAttribute.class);
            PositionIncrementAttribute posAttr = stream.getAttribute(PositionIncrementAttribute.class);
            TypeAttribute typeAttr = stream.getAttribute(TypeAttribute.class);

            System.out.println(new String(termAttr.buffer()));
        }

        System.out.println((System.currentTimeMillis() - start) + "ms");
    }
项目:NYBC    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:NYBC    文件:TestSnowball.java   
public void testFilterTokens() throws Exception {
  SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
  TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
  FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);

  filter.incrementToken();

  assertEquals("accent", termAtt.toString());
  assertEquals(2, offsetAtt.startOffset());
  assertEquals(7, offsetAtt.endOffset());
  assertEquals("wrd", typeAtt.type());
  assertEquals(3, posIncAtt.getPositionIncrement());
  assertEquals(77, flagsAtt.getFlags());
  assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
}
项目:NYBC    文件:Token.java   
@Override
public void copyTo(AttributeImpl target) {
  if (target instanceof Token) {
    final Token to = (Token) target;
    to.reinit(this);
    // reinit shares the payload, so clone it:
    if (payload !=null) {
      to.payload = payload.clone();
    }
  } else {
    super.copyTo(target);
    ((OffsetAttribute) target).setOffset(startOffset, endOffset);
    ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
    ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
    ((FlagsAttribute) target).setFlags(flags);
    ((TypeAttribute) target).setType(type);
  }
}
项目:NYBC    文件:TestNumericTokenStream.java   
public void testLongStream() throws Exception {
  final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue);
  // use getAttribute to test if attributes really exist, if not an IAE will be throwed
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
  final BytesRef bytes = bytesAtt.getBytesRef();
  stream.reset();
  assertEquals(64, numericAtt.getValueSize());
  for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    final int hash = bytesAtt.fillBytesRef();
    assertEquals("Hash incorrect", bytes.hashCode(), hash);
    assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes));
    assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
项目:NYBC    文件:TestNumericTokenStream.java   
public void testIntStream() throws Exception {
  final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue);
  // use getAttribute to test if attributes really exist, if not an IAE will be throwed
  final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class);
  final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class);
  final BytesRef bytes = bytesAtt.getBytesRef();
  stream.reset();
  assertEquals(32, numericAtt.getValueSize());
  for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) {
    assertTrue("New token is available", stream.incrementToken());
    assertEquals("Shift value wrong", shift, numericAtt.getShift());
    final int hash = bytesAtt.fillBytesRef();
    assertEquals("Hash incorrect", bytes.hashCode(), hash);
    assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes));
    assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue());
    assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type());
  }
  assertFalse("More tokens available", stream.incrementToken());
  stream.end();
  stream.close();
}
项目:NYBC    文件:SpellingQueryConverter.java   
protected void analyze(Collection<Token> result, Reader text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
项目:search-core    文件:SpellingQueryConverter.java   
protected void analyze(Collection<Token> result, Reader text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
项目:read-open-source-code    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:read-open-source-code    文件:Token.java   
@Override
public void copyTo(AttributeImpl target) {
  if (target instanceof Token) {
    final Token to = (Token) target;
    to.reinit(this);
    // reinit shares the payload, so clone it:
    if (payload !=null) {
      to.payload = payload.clone();
    }
  } else {
    super.copyTo(target);
    ((OffsetAttribute) target).setOffset(startOffset, endOffset);
    ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
    ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
    ((FlagsAttribute) target).setFlags(flags);
    ((TypeAttribute) target).setType(type);
  }
}
项目:read-open-source-code    文件:SpellingQueryConverter.java   
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
项目:read-open-source-code    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}
项目:read-open-source-code    文件:Token.java   
@Override
public void copyTo(AttributeImpl target) {
  if (target instanceof Token) {
    final Token to = (Token) target;
    to.reinit(this);
    // reinit shares the payload, so clone it:
    if (payload !=null) {
      to.payload = payload.clone();
    }
  } else {
    super.copyTo(target);
    ((OffsetAttribute) target).setOffset(startOffset, endOffset);
    ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
    ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
    ((FlagsAttribute) target).setFlags(flags);
    ((TypeAttribute) target).setType(type);
  }
}
项目:read-open-source-code    文件:PrefixAwareTokenFilter.java   
public PrefixAwareTokenFilter(TokenStream prefix, TokenStream suffix) {
  super(suffix);
  this.suffix = suffix;
  this.prefix = prefix;
  prefixExhausted = false;

  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
  offsetAtt = addAttribute(OffsetAttribute.class);
  typeAtt = addAttribute(TypeAttribute.class);
  flagsAtt = addAttribute(FlagsAttribute.class);

  p_termAtt = prefix.addAttribute(CharTermAttribute.class);
  p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
  p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
  p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
  p_typeAtt = prefix.addAttribute(TypeAttribute.class);
  p_flagsAtt = prefix.addAttribute(FlagsAttribute.class);
}