Java 类org.apache.lucene.index.Fields 实例源码

项目:elasticsearch_my    文件:PagedBytesIndexFieldData.java   
/**
 * @return the estimate for loading the entire term set into field data, or 0 if unavailable
 */
public long estimateStringFieldData() {
    try {
        LeafReader reader = context.reader();
        Terms terms = reader.terms(getFieldName());

        Fields fields = reader.fields();
        final Terms fieldTerms = fields.terms(getFieldName());

        if (fieldTerms instanceof FieldReader) {
            final Stats stats = ((FieldReader) fieldTerms).getStats();
            long totalTermBytes = stats.totalTermBytes;
            if (logger.isTraceEnabled()) {
                logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
                        totalTermBytes, terms.size(), terms.getSumDocFreq());
            }
            long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
            return totalBytes;
        }
    } catch (Exception e) {
        logger.warn("Unable to estimate memory overhead", e);
    }
    return 0;
}
项目:elasticsearch_my    文件:MoreLikeThisQueryBuilderTests.java   
@Override
protected MultiTermVectorsResponse executeMultiTermVectors(MultiTermVectorsRequest mtvRequest) {
    try {
        MultiTermVectorsItemResponse[] responses = new MultiTermVectorsItemResponse[mtvRequest.size()];
        int i = 0;
        for (TermVectorsRequest request : mtvRequest) {
            TermVectorsResponse response = new TermVectorsResponse(request.index(), request.type(), request.id());
            response.setExists(true);
            Fields generatedFields;
            if (request.doc() != null) {
                generatedFields = generateFields(randomFields, request.doc().utf8ToString());
            } else {
                generatedFields = generateFields(request.selectedFields().toArray(new String[request.selectedFields().size()]), request.id());
            }
            EnumSet<TermVectorsRequest.Flag> flags = EnumSet.of(TermVectorsRequest.Flag.Positions, TermVectorsRequest.Flag.Offsets);
            response.setFields(generatedFields, request.selectedFields(), flags, generatedFields);
            responses[i++] = new MultiTermVectorsItemResponse(response, null);
        }
        return new MultiTermVectorsResponse(responses);
    } catch (IOException ex) {
        throw new ElasticsearchException("boom", ex);
    }
}
项目:elasticsearch_my    文件:GetTermVectorsIT.java   
public void testDuelESLucene() throws Exception {
    TestFieldSetting[] testFieldSettings = getFieldSettings();
    createIndexBasedOnFieldSettings("test", "alias", testFieldSettings);
    //we generate as many docs as many shards we have
    TestDoc[] testDocs = generateTestDocs("test", testFieldSettings);

    DirectoryReader directoryReader = indexDocsWithLucene(testDocs);
    TestConfig[] testConfigs = generateTestConfigs(20, testDocs, testFieldSettings);

    for (TestConfig test : testConfigs) {
        TermVectorsRequestBuilder request = getRequestForConfig(test);
        if (test.expectedException != null) {
            assertThrows(request, test.expectedException);
            continue;
        }

        TermVectorsResponse response = request.get();
        Fields luceneTermVectors = getTermVectorsFromLucene(directoryReader, test.doc);
        validateResponse(response, luceneTermVectors, test);
    }
}
项目:lams    文件:Lucene40TermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:lams    文件:Lucene3xTermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:lams    文件:TermVectorsWriter.java   
/** Merges in the term vectors from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  over deleted documents, and uses {@link #startDocument(int)},
 *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, 
 *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
 *  and {@link #finish(FieldInfos, int)},
 *  returning the number of documents that were written.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  for (int i = 0; i < mergeState.readers.size(); i++) {
    final AtomicReader reader = mergeState.readers.get(i);
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();

    for (int docID = 0; docID < maxDoc; docID++) {
      if (liveDocs != null && !liveDocs.get(docID)) {
        // skip deleted docs
        continue;
      }
      // NOTE: it's very important to first assign to vectors then pass it to
      // termVectorsWriter.addAllDocVectors; see LUCENE-1282
      Fields vectors = reader.getTermVectors(docID);
      addAllDocVectors(vectors, mergeState);
      docCount++;
      mergeState.checkAbort.work(300);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
项目:Elasticsearch    文件:DfsOnlyRequest.java   
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
项目:search    文件:WeightedSpanTermExtractor.java   
@Override
public Fields fields() throws IOException {
  return new FilterFields(super.fields()) {
    @Override
    public Terms terms(String field) throws IOException {
      return super.terms(DelegatingAtomicReader.FIELD_NAME);
    }

    @Override
    public Iterator<String> iterator() {
      return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
    }

    @Override
    public int size() {
      return 1;
    }
  };
}
项目:search    文件:TokenSources.java   
/**
 * A convenience method that tries to first get a TermPositionVector for the
 * specified docId, then, falls back to using the passed in
 * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
 * This is useful when you already have the document, but would prefer to use
 * the vector first.
 * 
 * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
 *        and get the vector from
 * @param docId The docId to retrieve.
 * @param field The field to retrieve on the document
 * @param doc The document to fall back on
 * @param analyzer The analyzer to use for creating the TokenStream if the
 *        vector doesn't exist
 * @return The {@link org.apache.lucene.analysis.TokenStream} for the
 *         {@link org.apache.lucene.index.IndexableField} on the
 *         {@link org.apache.lucene.document.Document}
 * @throws IOException if there was an error loading
 */

public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Document doc, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(doc, field, analyzer);
  }
  return ts;
}
项目:search    文件:TokenSources.java   
/**
 * A convenience method that tries a number of approaches to getting a token
 * stream. The cost of finding there are no termVectors in the index is
 * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
 * approach to coding is probably acceptable
 * 
 * @return null if field not stored correctly
 * @throws IOException If there is a low-level I/O error
 */
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(reader, docId, field, analyzer);
  }
  return ts;
}
项目:search    文件:TokenSources.java   
/**
 * Returns a {@link TokenStream} with positions and offsets constructed from
 * field termvectors.  If the field has no termvectors, or positions or offsets
 * are not included in the termvector, return null.
 * @param reader the {@link IndexReader} to retrieve term vectors from
 * @param docId the document to retrieve termvectors for
 * @param field the field to retrieve termvectors for
 * @return a {@link TokenStream}, or null if positions and offsets are not available
 * @throws IOException If there is a low-level I/O error
 */
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
                                                    String field) throws IOException {

  Fields vectors = reader.getTermVectors(docId);
  if (vectors == null) {
    return null;
  }

  Terms vector = vectors.terms(field);
  if (vector == null) {
    return null;
  }

  if (!vector.hasPositions() || !vector.hasOffsets()) {
    return null;
  }

  return getTokenStream(vector);
}
项目:search    文件:SumTotalTermFreqValueSource.java   
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
  long sumTotalTermFreq = 0;
  for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
    Fields fields = readerContext.reader().fields();
    if (fields == null) continue;
    Terms terms = fields.terms(indexedField);
    if (terms == null) continue;
    long v = terms.getSumTotalTermFreq();
    if (v == -1) {
      sumTotalTermFreq = -1;
      break;
    } else {
      sumTotalTermFreq += v;
    }
  }
  final long ttf = sumTotalTermFreq;
  context.put(this, new LongDocValues(this) {
    @Override
    public long longVal(int doc) {
      return ttf;
    }
  });
}
项目:search    文件:LuceneTestCase.java   
/** 
 * Fields api equivalency 
 */
public void assertFieldsEquals(String info, IndexReader leftReader, Fields leftFields, Fields rightFields, boolean deep) throws IOException {
  // Fields could be null if there are no postings,
  // but then it must be null for both
  if (leftFields == null || rightFields == null) {
    assertNull(info, leftFields);
    assertNull(info, rightFields);
    return;
  }
  assertFieldStatisticsEquals(info, leftFields, rightFields);

  Iterator<String> leftEnum = leftFields.iterator();
  Iterator<String> rightEnum = rightFields.iterator();

  while (leftEnum.hasNext()) {
    String field = leftEnum.next();
    assertEquals(info, field, rightEnum.next());
    assertTermsEquals(info, leftReader, leftFields.terms(field), rightFields.terms(field), deep);
  }
  assertFalse(rightEnum.hasNext());
}
项目:search    文件:LuceneTestCase.java   
/** 
 * checks that norms are the same across all fields 
 */
public void assertNormsEquals(String info, IndexReader leftReader, IndexReader rightReader) throws IOException {
  Fields leftFields = MultiFields.getFields(leftReader);
  Fields rightFields = MultiFields.getFields(rightReader);
  // Fields could be null if there are no postings,
  // but then it must be null for both
  if (leftFields == null || rightFields == null) {
    assertNull(info, leftFields);
    assertNull(info, rightFields);
    return;
  }

  for (String field : leftFields) {
    NumericDocValues leftNorms = MultiDocValues.getNormValues(leftReader, field);
    NumericDocValues rightNorms = MultiDocValues.getNormValues(rightReader, field);
    if (leftNorms != null && rightNorms != null) {
      assertDocValuesEquals(info, leftReader.maxDoc(), leftNorms, rightNorms);
    } else {
      assertNull(info, leftNorms);
      assertNull(info, rightNorms);
    }
  }
}
项目:search    文件:Lucene40TermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:search    文件:Lucene3xTermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:search    文件:TermVectorsWriter.java   
/** Merges in the term vectors from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  over deleted documents, and uses {@link #startDocument(int)},
 *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, 
 *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
 *  and {@link #finish(FieldInfos, int)},
 *  returning the number of documents that were written.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  for (int i = 0; i < mergeState.readers.size(); i++) {
    final AtomicReader reader = mergeState.readers.get(i);
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();

    for (int docID = 0; docID < maxDoc; docID++) {
      if (liveDocs != null && !liveDocs.get(docID)) {
        // skip deleted docs
        continue;
      }
      // NOTE: it's very important to first assign to vectors then pass it to
      // termVectorsWriter.addAllDocVectors; see LUCENE-1282
      Fields vectors = reader.getTermVectors(docID);
      addAllDocVectors(vectors, mergeState);
      docCount++;
      mergeState.checkAbort.work(300);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
项目:search    文件:TestMultiThreadTermVectors.java   
private void testTermVectors() throws Exception {
  // check:
  int numDocs = reader.numDocs();
  long start = 0L;
  for (int docId = 0; docId < numDocs; docId++) {
    start = System.currentTimeMillis();
    Fields vectors = reader.getTermVectors(docId);
    timeElapsed += System.currentTimeMillis()-start;

    // verify vectors result
    verifyVectors(vectors, docId);

    start = System.currentTimeMillis();
    Terms vector = reader.getTermVectors(docId).terms("field");
    timeElapsed += System.currentTimeMillis()-start;

    verifyVector(vector.iterator(null), docId);
  }
}
项目:search    文件:SolrIndexSearcher.java   
/**
 * Returns the first document number containing the term <code>t</code>
 * Returns -1 if no document was found.
 * This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
  Fields fields = atomicReader.fields();
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
  if (docs == null) return -1;
  int id = docs.nextDoc();
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:search    文件:TestRTGBase.java   
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:eswc-2015-semantic-typing    文件:TfIdfSearcher.java   
/**
 * 
 * @param reader
 * @return Map of term and its inverse document frequency
 * 
 * @throws IOException
 */
public Map<String, Float> getIdfs(IndexReader reader) throws IOException
{
     Fields fields = MultiFields.getFields(reader); //get the fields of the index 

     for (String field: fields) 
     {   
         TermsEnum termEnum = MultiFields.getTerms(reader, field).iterator(null);

         BytesRef bytesRef;
         while ((bytesRef = termEnum.next()) != null) 
         {
             if (termEnum.seekExact(bytesRef)) 
             {
                 String term = bytesRef.utf8ToString(); 
                 float idf = tfidfSIM.idf( termEnum.docFreq(), reader.numDocs() );
                 inverseDocFreq.put(term, idf);    
                 System.out.println(term +" idf= "+ idf);
             }
         }
     }

     return inverseDocFreq;
}
项目:NYBC    文件:WeightedSpanTermExtractor.java   
@Override
public Fields fields() throws IOException {
  return new FilterFields(super.fields()) {
    @Override
    public Terms terms(String field) throws IOException {
      return super.terms(DelegatingAtomicReader.FIELD_NAME);
    }

    @Override
    public Iterator<String> iterator() {
      return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
    }

    @Override
    public int size() {
      return 1;
    }
  };
}
项目:NYBC    文件:TokenSources.java   
/**
 * A convenience method that tries to first get a TermPositionVector for the
 * specified docId, then, falls back to using the passed in
 * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
 * This is useful when you already have the document, but would prefer to use
 * the vector first.
 * 
 * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
 *        and get the vector from
 * @param docId The docId to retrieve.
 * @param field The field to retrieve on the document
 * @param doc The document to fall back on
 * @param analyzer The analyzer to use for creating the TokenStream if the
 *        vector doesn't exist
 * @return The {@link org.apache.lucene.analysis.TokenStream} for the
 *         {@link org.apache.lucene.index.IndexableField} on the
 *         {@link org.apache.lucene.document.Document}
 * @throws IOException if there was an error loading
 */

public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Document doc, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(doc, field, analyzer);
  }
  return ts;
}
项目:NYBC    文件:TokenSources.java   
/**
 * A convenience method that tries a number of approaches to getting a token
 * stream. The cost of finding there are no termVectors in the index is
 * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
 * approach to coding is probably acceptable
 * 
 * @return null if field not stored correctly
 * @throws IOException If there is a low-level I/O error
 */
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(reader, docId, field, analyzer);
  }
  return ts;
}
项目:NYBC    文件:TokenSources.java   
/**
 * Returns a {@link TokenStream} with positions and offsets constructed from
 * field termvectors.  If the field has no termvectors, or positions or offsets
 * are not included in the termvector, return null.
 * @param reader the {@link IndexReader} to retrieve term vectors from
 * @param docId the document to retrieve termvectors for
 * @param field the field to retrieve termvectors for
 * @return a {@link TokenStream}, or null if positions and offsets are not available
 * @throws IOException If there is a low-level I/O error
 */
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
                                                    String field) throws IOException {

  Fields vectors = reader.getTermVectors(docId);
  if (vectors == null) {
    return null;
  }

  Terms vector = vectors.terms(field);
  if (vector == null) {
    return null;
  }

  if (!vector.hasPositions() || !vector.hasOffsets()) {
    return null;
  }

  return getTokenStream(vector);
}
项目:NYBC    文件:Lucene40TermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:NYBC    文件:TermVectorsWriter.java   
/** Merges in the term vectors from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  over deleted documents, and uses {@link #startDocument(int)},
 *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, 
 *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
 *  and {@link #finish(FieldInfos, int)},
 *  returning the number of documents that were written.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  for (int i = 0; i < mergeState.readers.size(); i++) {
    final AtomicReader reader = mergeState.readers.get(i);
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();

    for (int docID = 0; docID < maxDoc; docID++) {
      if (liveDocs != null && !liveDocs.get(docID)) {
        // skip deleted docs
        continue;
      }
      // NOTE: it's very important to first assign to vectors then pass it to
      // termVectorsWriter.addAllDocVectors; see LUCENE-1282
      Fields vectors = reader.getTermVectors(docID);
      addAllDocVectors(vectors, mergeState);
      docCount++;
      mergeState.checkAbort.work(300);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
项目:NYBC    文件:TestMultiThreadTermVectors.java   
private void testTermVectors() throws Exception {
  // check:
  int numDocs = reader.numDocs();
  long start = 0L;
  for (int docId = 0; docId < numDocs; docId++) {
    start = System.currentTimeMillis();
    Fields vectors = reader.getTermVectors(docId);
    timeElapsed += System.currentTimeMillis()-start;

    // verify vectors result
    verifyVectors(vectors, docId);

    start = System.currentTimeMillis();
    Terms vector = reader.getTermVectors(docId).terms("field");
    timeElapsed += System.currentTimeMillis()-start;

    verifyVector(vector.iterator(null), docId);
  }
}
项目:NYBC    文件:SolrIndexSearcher.java   
/**
 * Returns the first document number containing the term <code>t</code>
 * Returns -1 if no document was found.
 * This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
  Fields fields = atomicReader.fields();
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
  if (docs == null) return -1;
  int id = docs.nextDoc();
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:NYBC    文件:TestRTGBase.java   
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:incubator-blur    文件:IndexImporter.java   
private void runOldMergeSortRowIdCheckAndDelete(boolean emitDeletes, IndexReader currentIndexReader,
    BlurPartitioner blurPartitioner, Text key, int numberOfShards, int shardId, Action action,
    AtomicReader atomicReader) throws IOException {
  MergeSortRowIdLookup lookup = new MergeSortRowIdLookup(currentIndexReader);
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.ROW_ID);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef ref = null;
    while ((ref = termsEnum.next()) != null) {
      key.set(ref.bytes, ref.offset, ref.length);
      int partition = blurPartitioner.getPartition(key, null, numberOfShards);
      if (shardId != partition) {
        throw new IOException("Index is corrupted, RowIds are found in wrong shard, partition [" + partition
            + "] does not shard [" + shardId + "], this can happen when rows are not hashed correctly.");
      }
      if (emitDeletes) {
        lookup.lookup(ref, action);
      }
    }
  }
}
项目:incubator-blur    文件:MutatableAction.java   
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
项目:search-core    文件:SolrIndexSearcher.java   
/**
 * Returns the first document number containing the term <code>t</code> Returns -1 if no
 * document was found. This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * 
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
    Fields fields = atomicReader.fields();
    if(fields == null)
        return -1;
    Terms terms = fields.terms(t.field());
    if(terms == null)
        return -1;
    BytesRef termBytes = t.bytes();
    final TermsEnum termsEnum = terms.iterator(null);
    if(!termsEnum.seekExact(termBytes, false)) {
        return -1;
    }
    DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
    if(docs == null)
        return -1;
    int id = docs.nextDoc();
    return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:search-core    文件:TestRTGBase.java   
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
项目:read-open-source-code    文件:Lucene40TermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:read-open-source-code    文件:Lucene3xTermVectorsReader.java   
@Override
public Fields get(int docID) throws IOException {
  if (tvx != null) {
    Fields fields = new TVFields(docID);
    if (fields.size() == 0) {
      // TODO: we can improve writer here, eg write 0 into
      // tvx file, so we know on first read from tvx that
      // this doc has no TVs
      return null;
    } else {
      return fields;
    }
  } else {
    return null;
  }
}
项目:read-open-source-code    文件:TermVectorsWriter.java   
/** Merges in the term vectors from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  over deleted documents, and uses {@link #startDocument(int)},
 *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, 
 *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
 *  and {@link #finish(FieldInfos, int)},
 *  returning the number of documents that were written.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  for (int i = 0; i < mergeState.readers.size(); i++) {
    final AtomicReader reader = mergeState.readers.get(i);
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();

    for (int docID = 0; docID < maxDoc; docID++) {
      if (liveDocs != null && !liveDocs.get(docID)) {
        // skip deleted docs
        continue;
      }
      // NOTE: it's very important to first assign to vectors then pass it to
      // termVectorsWriter.addAllDocVectors; see LUCENE-1282
      Fields vectors = reader.getTermVectors(docID);
      addAllDocVectors(vectors, mergeState);
      docCount++;
      mergeState.checkAbort.work(300);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
项目:read-open-source-code    文件:SumTotalTermFreqValueSource.java   
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
  long sumTotalTermFreq = 0;
  for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
    Fields fields = readerContext.reader().fields();
    if (fields == null) continue;
    Terms terms = fields.terms(indexedField);
    if (terms == null) continue;
    long v = terms.getSumTotalTermFreq();
    if (v == -1) {
      sumTotalTermFreq = -1;
      break;
    } else {
      sumTotalTermFreq += v;
    }
  }
  final long ttf = sumTotalTermFreq;
  context.put(this, new LongDocValues(this) {
    @Override
    public long longVal(int doc) {
      return ttf;
    }
  });
}
项目:read-open-source-code    文件:WeightedSpanTermExtractor.java   
@Override
public Fields fields() throws IOException {
  return new FilterFields(super.fields()) {
    @Override
    public Terms terms(String field) throws IOException {
      return super.terms(DelegatingAtomicReader.FIELD_NAME);
    }

    @Override
    public Iterator<String> iterator() {
      return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator();
    }

    @Override
    public int size() {
      return 1;
    }
  };
}
项目:read-open-source-code    文件:SolrIndexSearcher.java   
/**
 * Returns the first document number containing the term <code>t</code>
 * Returns -1 if no document was found.
 * This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
  Fields fields = atomicReader.fields();
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
  if (docs == null) return -1;
  int id = docs.nextDoc();
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}