@Override public Filter getFilter(Element e) throws ParserException { List<BytesRef> terms = new ArrayList<>(); String text = DOMUtils.getNonBlankTextOrFail(e); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); TokenStream ts = null; try { ts = analyzer.tokenStream(fieldName, text); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); terms.add(BytesRef.deepCopyOf(bytes)); } ts.end(); } catch (IOException ioe) { throw new RuntimeException("Error constructing terms from index:" + ioe); } finally { IOUtils.closeWhileHandlingException(ts); } return new TermsFilter(fieldName, terms); }
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) { if (analyzerIn == null) analyzerIn = getAnalyzer(); TokenStream source = null; try { source = analyzerIn.tokenStream(field, part); source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part); termAtt.fillBytesRef(); if (source.incrementToken()) throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part); source.end(); return BytesRef.deepCopyOf(bytes); } catch (IOException e) { throw new RuntimeException("Error analyzing multiTerm term: " + part, e); } finally { IOUtils.closeWhileHandlingException(source); } }
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException { List<BytesRef> bytesRefs = new ArrayList<>(); TokenStream tokenStream = analyzer.tokenStream(field, text); try { TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef bytesRef = termAttribute.getBytesRef(); tokenStream.reset(); while (tokenStream.incrementToken()) { termAttribute.fillBytesRef(); bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); } tokenStream.end(); } finally { IOUtils.closeWhileHandlingException(tokenStream); } return bytesRefs; }
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) throws Exception { TokenStream ts1 = a1.tokenStream("bogus", text); TokenStream ts2 = a2.tokenStream("bogus", text); ts1.reset(); ts2.reset(); TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class); assertTrue(ts1.incrementToken()); assertTrue(ts2.incrementToken()); BytesRef bytes1 = termAtt1.getBytesRef(); BytesRef bytes2 = termAtt2.getBytesRef(); termAtt1.fillBytesRef(); termAtt2.fillBytesRef(); assertEquals(bytes1, bytes2); assertFalse(ts1.incrementToken()); assertFalse(ts2.incrementToken()); ts1.close(); ts2.close(); }
public void testIndexWriter_LUCENE4656() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null)); TokenStream ts = new EmptyTokenStream(); assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class)); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new TextField("description", ts)); // this should not fail because we have no TermToBytesRefAttribute writer.addDocument(doc); assertEquals(1, writer.numDocs()); writer.close(); directory.close(); }
public void testLongStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); assertNotNull(bytesAtt); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); assertNotNull(typeAtt); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); assertNotNull(numericAtt); final BytesRef bytes = bytesAtt.getBytesRef(); stream.reset(); assertEquals(64, numericAtt.getValueSize()); for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); bytesAtt.fillBytesRef(); assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } assertFalse("More tokens available", stream.incrementToken()); stream.end(); stream.close(); }
public void testIntStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); assertNotNull(bytesAtt); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); assertNotNull(typeAtt); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); assertNotNull(numericAtt); final BytesRef bytes = bytesAtt.getBytesRef(); stream.reset(); assertEquals(32, numericAtt.getValueSize()); for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); bytesAtt.fillBytesRef(); assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } assertFalse("More tokens available", stream.incrementToken()); stream.end(); stream.close(); }
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) { if (part == null || analyzerIn == null) return null; TokenStream source = null; try { source = analyzerIn.tokenStream(field, part); source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part); termAtt.fillBytesRef(); if (source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part); source.end(); return BytesRef.deepCopyOf(bytes); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e); } finally { IOUtils.closeWhileHandlingException(source); } }
/** * Creates a span query from the tokenstream. In the case of a single token, * a simple <code>SpanTermQuery</code> is returned. When multiple tokens, an * ordered <code>SpanNearQuery</code> with slop of 0 is returned. */ protected final SpanQuery createSpanQuery(TokenStream in, String field) throws IOException { TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<SpanTermQuery> terms = new ArrayList<>(); while (in.incrementToken()) { terms.add(new SpanTermQuery(new Term(field, termAtt.getBytesRef()))); } if (terms.isEmpty()) { return null; } else if (terms.size() == 1) { return terms.get(0); } else { return new SpanNearQuery(terms.toArray(new SpanTermQuery[0]), 0, true); } }
/** * Creates complex boolean query from the cached tokenstream contents */ protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException { BooleanQuery.Builder q = newBooleanQuery(); List<Term> currentQuery = new ArrayList<>(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { if (posIncrAtt.getPositionIncrement() != 0) { add(q, currentQuery, operator); currentQuery.clear(); } currentQuery.add(new Term(field, termAtt.getBytesRef())); } add(q, currentQuery, operator); return q.build(); }
/** * Creates simple phrase query from the cached tokenstream contents */ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; stream.reset(); while (stream.incrementToken()) { if (enablePositionIncrements) { position += posIncrAtt.getPositionIncrement(); } else { position += 1; } builder.add(new Term(field, termAtt.getBytesRef()), position); } return builder.build(); }
protected Query doToQuery(QueryShardContext context) throws IOException { // Analyzer analyzer = context.getMapperService().searchAnalyzer(); Analyzer analyzer = new WhitespaceAnalyzer(); try (TokenStream source = analyzer.tokenStream(fieldName, value.toString())) { CachingTokenFilter stream = new CachingTokenFilter(new LowerCaseFilter(source)); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); if (termAtt == null) { return null; } List<CustomSpanTermQuery> clauses = new ArrayList<>(); stream.reset(); while (stream.incrementToken()) { Term term = new Term(fieldName, termAtt.getBytesRef()); clauses.add(new CustomSpanTermQuery(term)); } return new PhraseCountQuery(clauses.toArray(new CustomSpanTermQuery[clauses.size()]), slop, inOrder, weightedCount); } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } }
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException { List<BytesRef> bytesRefs = new ArrayList<BytesRef>(); TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text)); TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef bytesRef = termAttribute.getBytesRef(); tokenStream.reset(); while (tokenStream.incrementToken()) { termAttribute.fillBytesRef(); bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); } tokenStream.end(); tokenStream.close(); return bytesRefs; }
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) throws Exception { TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text)); TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); ts1.reset(); ts2.reset(); TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class); TermToBytesRefAttribute termAtt2 = ts2.addAttribute(TermToBytesRefAttribute.class); assertTrue(ts1.incrementToken()); assertTrue(ts2.incrementToken()); BytesRef bytes1 = termAtt1.getBytesRef(); BytesRef bytes2 = termAtt2.getBytesRef(); termAtt1.fillBytesRef(); termAtt2.fillBytesRef(); assertEquals(bytes1, bytes2); assertFalse(ts1.incrementToken()); assertFalse(ts2.incrementToken()); ts1.close(); ts2.close(); }
public void testIndexWriter_LUCENE4656() throws IOException { Directory directory = newDirectory(); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, null)); TokenStream ts = new EmptyTokenStream(); assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class)); Document doc = new Document(); doc.add(new StringField("id", "0", Field.Store.YES)); doc.add(new TextField("description", ts)); // this should not fail because we have no TermToBytesRefAttribute writer.addDocument(doc); assertEquals(1, writer.numDocs()); writer.close(); directory.close(); }
@Override public Filter getFilter(Element e) throws ParserException { List<BytesRef> terms = new ArrayList<BytesRef>(); String text = DOMUtils.getNonBlankTextOrFail(e); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); try { TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); Term term = null; BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); terms.add(BytesRef.deepCopyOf(bytes)); } ts.end(); ts.close(); } catch (IOException ioe) { throw new RuntimeException("Error constructing terms from index:" + ioe); } return new TermsFilter(fieldName, terms); }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); try { List<SpanQuery> clausesList = new ArrayList<SpanQuery>(); TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(value)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes))); clausesList.add(stq); } ts.end(); ts.close(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; } catch (IOException ioe) { throw new ParserException("IOException parsing value:" + value); } }
public void testLongStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setLongValue(lvalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); final BytesRef bytes = bytesAtt.getBytesRef(); stream.reset(); assertEquals(64, numericAtt.getValueSize()); for (int shift=0; shift<64; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); final int hash = bytesAtt.fillBytesRef(); assertEquals("Hash incorrect", bytes.hashCode(), hash); assertEquals("Term is incorrectly encoded", lvalue & ~((1L << shift) - 1L), NumericUtils.prefixCodedToLong(bytes)); assertEquals("Term raw value is incorrectly encoded", lvalue & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } assertFalse("More tokens available", stream.incrementToken()); stream.end(); stream.close(); }
public void testIntStream() throws Exception { final NumericTokenStream stream=new NumericTokenStream().setIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be throwed final TermToBytesRefAttribute bytesAtt = stream.getAttribute(TermToBytesRefAttribute.class); final TypeAttribute typeAtt = stream.getAttribute(TypeAttribute.class); final NumericTokenStream.NumericTermAttribute numericAtt = stream.getAttribute(NumericTokenStream.NumericTermAttribute.class); final BytesRef bytes = bytesAtt.getBytesRef(); stream.reset(); assertEquals(32, numericAtt.getValueSize()); for (int shift=0; shift<32; shift+=NumericUtils.PRECISION_STEP_DEFAULT) { assertTrue("New token is available", stream.incrementToken()); assertEquals("Shift value wrong", shift, numericAtt.getShift()); final int hash = bytesAtt.fillBytesRef(); assertEquals("Hash incorrect", bytes.hashCode(), hash); assertEquals("Term is incorrectly encoded", ivalue & ~((1 << shift) - 1), NumericUtils.prefixCodedToInt(bytes)); assertEquals("Term raw value is incorrectly encoded", ((long) ivalue) & ~((1L << shift) - 1L), numericAtt.getRawValue()); assertEquals("Type incorrect", (shift == 0) ? NumericTokenStream.TOKEN_TYPE_FULL_PREC : NumericTokenStream.TOKEN_TYPE_LOWER_PREC, typeAtt.type()); } assertFalse("More tokens available", stream.incrementToken()); stream.end(); stream.close(); }
@Override public Filter getFilter(Element e) throws ParserException { List<BytesRef> terms = new ArrayList<BytesRef>(); String text = DOMUtils.getNonBlankTextOrFail(e); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); TokenStream ts = null; try { ts = analyzer.tokenStream(fieldName, text); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); terms.add(BytesRef.deepCopyOf(bytes)); } ts.end(); } catch (IOException ioe) { throw new RuntimeException("Error constructing terms from index:" + ioe); } finally { IOUtils.closeWhileHandlingException(ts); } return new TermsFilter(fieldName, terms); }
public Tagger(Terms terms, Bits liveDocs, TokenStream tokenStream, TagClusterReducer tagClusterReducer, boolean skipAltTokens, boolean ignoreStopWords) throws IOException { this.terms = terms; this.liveDocs = liveDocs; this.tokenStream = tokenStream; this.skipAltTokens = skipAltTokens; this.ignoreStopWords = ignoreStopWords; // termAtt = tokenStream.addAttribute(CharTermAttribute.class); byteRefAtt = tokenStream.addAttribute(TermToBytesRefAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); lookupAtt = tokenStream.addAttribute(TaggingAttribute.class); tokenStream.reset(); this.tagClusterReducer = tagClusterReducer; }
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException { List<BytesRef> bytesRefs = new ArrayList<BytesRef>(); TokenStream tokenStream = analyzer.tokenStream(field, text); try { TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); BytesRef bytesRef = termAttribute.getBytesRef(); tokenStream.reset(); while (tokenStream.incrementToken()) { termAttribute.fillBytesRef(); bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); } tokenStream.end(); } finally { IOUtils.closeWhileHandlingException(tokenStream); } return bytesRefs; }
/** * Sets attributeSource to a new instance. */ void setAttributeSource(AttributeSource attributeSource) { if (this.attributeSource != attributeSource) { this.attributeSource = attributeSource; termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class); posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class); offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class); payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class); } }
@Override public void reflectWith(AttributeReflector reflector) { fillBytesRef(); reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes.toBytesRef()); reflector.reflect(NumericTermAttribute.class, "shift", shift); reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue()); reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize); }
@Override public SpanQuery getSpanQuery(Element e) throws ParserException { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); List<SpanQuery> clausesList = new ArrayList<>(); TokenStream ts = null; try { ts = analyzer.tokenStream(fieldName, value); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); while (ts.incrementToken()) { termAtt.fillBytesRef(); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName, BytesRef.deepCopyOf(bytes))); clausesList.add(stq); } ts.end(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; } catch (IOException ioe) { throw new ParserException("IOException parsing value:" + value); } finally { IOUtils.closeWhileHandlingException(ts); } }