Java 类org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter 实例源码

项目:elasticsearch-analysis-opennlp    文件:PayloadsFilterTest.java   
@Test
public void testDelimitedPayloads() throws Exception {
    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
    MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    mockTokenizer.setReader(new StringReader(test));
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(mockTokenizer,
            DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
    filter.reset();
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);

    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
    assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
    assertTermEquals("over", filter, termAtt, payAtt, null);
    assertTermEquals("the", filter, termAtt, payAtt, null);
    assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));

    assertFalse(filter.incrementToken());
}
项目:elasticsearch-analysis-opennlp    文件:PayloadsFilterTest.java   
@Test
public void testKeepPayloads() throws Exception {
    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
    MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    mockTokenizer.setReader(new StringReader(test));
    DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer,
            DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
    byte[][] payloads = {
            "VB".getBytes(StandardCharsets.UTF_8),
            "NN".getBytes(StandardCharsets.UTF_8)
    };
    FilterPayloadsFilter filter = new FilterPayloadsFilter(baseFilter, payloads, true);
    filter.reset();
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
    assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes("UTF-8"));
    assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes("UTF-8"));
    assertFalse(filter.incrementToken());
}
项目:elasticsearch-analysis-opennlp    文件:PayloadsFilterTest.java   
@Test
public void testFilterPayloads() throws Exception {
    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
    MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    mockTokenizer.setReader(new StringReader(test));
    DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer,
            DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
    byte[][] payloads = {
            "VB".getBytes("UTF-8"),
            "NN".getBytes("UTF-8")
    };
    FilterPayloadsFilter filter = new FilterPayloadsFilter(baseFilter, payloads, false);
    filter.reset();
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    assertTermEquals("The", filter, termAtt, payAtt, null);
    assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("over", filter, termAtt, payAtt, null);
    assertTermEquals("the", filter, termAtt, payAtt, null);
    assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
    assertFalse(filter.incrementToken());
}
项目:elasticsearch-analysis-opennlp    文件:PayloadsFilterTest.java   
@Test
public void testStripPayloads() throws Exception {
    String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
    MockTokenizer mockTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    mockTokenizer.setReader(new StringReader(test));
    DelimitedPayloadTokenFilter baseFilter = new DelimitedPayloadTokenFilter(mockTokenizer,
            DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
    StripPayloadsTokenFilter filter = new StripPayloadsTokenFilter(baseFilter);
    filter.reset();
    CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
    PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
    assertTermPayload("The", filter, termAtt, payAtt);
    assertTermPayload("quick", filter, termAtt, payAtt);
    assertTermPayload("red", filter, termAtt, payAtt);
    assertTermPayload("fox", filter, termAtt, payAtt);
    assertTermPayload("jumped", filter, termAtt, payAtt);
    assertTermPayload("over", filter, termAtt, payAtt);
    assertTermPayload("the", filter, termAtt, payAtt);
    assertTermPayload("lazy", filter, termAtt, payAtt);
    assertTermPayload("brown", filter, termAtt, payAtt);
    assertTermPayload("dogs", filter, termAtt, payAtt);
    assertFalse(filter.incrementToken());
}
项目:NYBC    文件:TestDelimitedPayloadTokenFilterFactory.java   
public void testEncoder() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}
项目:NYBC    文件:TestDelimitedPayloadTokenFilterFactory.java   
public void testDelim() throws Exception {
  Map<String,String> args = new HashMap<String, String>();
  args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, FloatEncoder.class.getName());
  args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
  DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
  factory.init(args);
  ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
  factory.inform(loader);

  TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);
  DelimitedPayloadTokenFilter tf = factory.create(input);
  tf.reset();
  while (tf.incrementToken()){
    PayloadAttribute payAttr = tf.getAttribute(PayloadAttribute.class);
    assertTrue("payAttr is null and it shouldn't be", payAttr != null);
    byte[] payData = payAttr.getPayload().bytes;
    assertTrue("payData is null and it shouldn't be", payData != null);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertTrue(payFloat + " does not equal: " + 0.1f, payFloat == 0.1f);
  }
}
项目:elasticsearch_my    文件:DelimitedPayloadTokenFilterFactory.java   
@Override
public TokenStream create(TokenStream tokenStream) {
    DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(tokenStream, delimiter, encoder);
    return filter;
}
项目:NYBC    文件:DelimitedPayloadTokenFilterFactory.java   
@Override
public DelimitedPayloadTokenFilter create(TokenStream input) {
  return new DelimitedPayloadTokenFilter(input, delimiter, encoder);
}