Java 类org.apache.lucene.search.TermStatistics 实例源码

项目:elasticsearch_my    文件:AggregatedDfs.java   
@Override
public void writeTo(final StreamOutput out) throws IOException {
    out.writeVInt(termStatistics.size());

    for (ObjectObjectCursor<Term, TermStatistics> c : termStatistics()) {
        Term term = c.key;
        out.writeString(term.field());
        out.writeBytesRef(term.bytes());
        TermStatistics stats = c.value;
        out.writeBytesRef(stats.term());
        out.writeVLong(stats.docFreq());
        out.writeVLong(DfsSearchResult.addOne(stats.totalTermFreq()));
    }

    DfsSearchResult.writeFieldStats(out, fieldStatistics);
    out.writeVLong(maxDoc);
}
项目:elasticsearch_my    文件:DfsSearchResult.java   
public static TermStatistics[] readTermStats(StreamInput in, Term[] terms) throws IOException {
    int termsStatsSize = in.readVInt();
    final TermStatistics[] termStatistics;
    if (termsStatsSize == 0) {
        termStatistics = EMPTY_TERM_STATS;
    } else {
        termStatistics = new TermStatistics[termsStatsSize];
        assert terms.length == termsStatsSize;
        for (int i = 0; i < termStatistics.length; i++) {
            BytesRef term = terms[i].bytes();
            final long docFreq = in.readVLong();
            assert docFreq >= 0;
            final long totalTermFreq = subOne(in.readVLong());
            termStatistics[i] = new TermStatistics(term, docFreq, totalTermFreq);
        }
    }
    return termStatistics;
}
项目:lucene-custom-query    文件:SeqSpanWeight.java   
protected SeqSpanWeight(SeqSpanQuery query, IndexSearcher searcher) throws IOException {
  super(query);
  this.selfQuery = query;
  this.similarity = searcher.getSimilarity(needsScores);
  this.positions = selfQuery.getPositions();
  this.terms = selfQuery.getTerms();
  this.field = terms[0].field();
  if (positions.length < 2) {
    throw new IllegalStateException("PhraseWeight does not support less than 2 terms, call rewrite first");
  } else if (positions[0] != 0) {
    throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first");
  }
  final IndexReaderContext context = searcher.getTopReaderContext();
  states = new TermContext[terms.length];
  TermStatistics termStats[] = new TermStatistics[terms.length];
  for (int i = 0; i < terms.length; i++) {
    final Term term = terms[i];
    states[i] = TermContext.build(context, term);
    termStats[i] = searcher.termStatistics(term, states[i]);
  }
  stats = similarity.computeWeight(searcher.collectionStatistics(terms[0].field()), termStats);
}
项目:Elasticsearch    文件:AggregatedDfs.java   
@Override
public void writeTo(final StreamOutput out) throws IOException {
    out.writeVInt(termStatistics.size());

    for (ObjectObjectCursor<Term, TermStatistics> c : termStatistics()) {
        Term term = (Term) c.key;
        out.writeString(term.field());
        out.writeBytesRef(term.bytes());
        TermStatistics stats = (TermStatistics) c.value;
        out.writeBytesRef(stats.term());
        out.writeVLong(stats.docFreq());
        out.writeVLong(DfsSearchResult.addOne(stats.totalTermFreq()));
    }

    DfsSearchResult.writeFieldStats(out, fieldStatistics);
    out.writeVLong(maxDoc);
}
项目:Elasticsearch    文件:DfsSearchResult.java   
public static TermStatistics[] readTermStats(StreamInput in, Term[] terms) throws IOException {
    int termsStatsSize = in.readVInt();
    final TermStatistics[] termStatistics;
    if (termsStatsSize == 0) {
        termStatistics = EMPTY_TERM_STATS;
    } else {
        termStatistics = new TermStatistics[termsStatsSize];
        assert terms.length == termsStatsSize;
        for (int i = 0; i < termStatistics.length; i++) {
            BytesRef term = terms[i].bytes();
            final long docFreq = in.readVLong();
            assert docFreq >= 0;
            final long totalTermFreq = subOne(in.readVLong());
            termStatistics[i] = new TermStatistics(term, docFreq, totalTermFreq);
        }
    }
    return termStatistics;
}
项目:lucene4ir    文件:SMARTBNNBNNSimilarity.java   
@Override
   public final SimWeight computeWeight(CollectionStatistics collectionStats,
                 TermStatistics... termStats)
   {
float N, n, idf, adl;
idf = 1.0f;
N   = collectionStats.maxDoc();
adl = collectionStats.sumTotalTermFreq() / N;

if (termStats.length == 1) {
    n = termStats[0].docFreq();
    idf = log(N/n);
}
else {
    for (final TermStatistics stat : termStats) {
    n = stat.docFreq();
    idf += log(N/n);
    }
}

return new TFIDFWeight(collectionStats.field(), idf, adl);
   }
项目:DoSeR-Disambiguation    文件:FuzzyLabelSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for each term in the
 * phrase.
 * 
 * @param collectionStats
 *            collection-level statistics
 * @param termStats
 *            term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf score factor for the
 *         phrase and an explanation for each term.
 */
public Explanation idfExplain(final CollectionStatistics collectionStats,
        final TermStatistics termStats[]) {
    final long max = collectionStats.maxDoc();
    float idf = 0.0f;
    final Explanation exp = new Explanation();
    exp.setDescription("idf(), sum of:");
    for (final TermStatistics stat : termStats) {
        final long docFreq = stat.docFreq();
        final float termIdf = idf(docFreq, max);
        exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + docFreq
                + ", maxDocs=" + max + ")"));
        idf += termIdf;
    }
    exp.setValue(idf);
    return exp;
}
项目:DoSeR    文件:FuzzyLabelSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for each term in the
 * phrase.
 * 
 * @param collectionStats
 *            collection-level statistics
 * @param termStats
 *            term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf score factor for the
 *         phrase and an explanation for each term.
 */
public Explanation idfExplain(final CollectionStatistics collectionStats,
        final TermStatistics termStats[]) {
    final long max = collectionStats.maxDoc();
    float idf = 0.0f;
    final Explanation exp = new Explanation();
    exp.setDescription("idf(), sum of:");
    for (final TermStatistics stat : termStats) {
        final long docFreq = stat.docFreq();
        final float termIdf = idf(docFreq, max);
        exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + docFreq
                + ", maxDocs=" + max + ")"));
        idf += termIdf;
    }
    exp.setValue(idf);
    return exp;
}
项目:elasticsearch_my    文件:AggregatedDfs.java   
@Override
public void readFrom(StreamInput in) throws IOException {
    int size = in.readVInt();
    termStatistics = HppcMaps.newMap(size);
    for (int i = 0; i < size; i++) {
        Term term = new Term(in.readString(), in.readBytesRef());
        TermStatistics stats = new TermStatistics(in.readBytesRef(),
                in.readVLong(),
                DfsSearchResult.subOne(in.readVLong()));
        termStatistics.put(term, stats);
    }
    fieldStatistics = DfsSearchResult.readFieldStats(in);
    maxDoc = in.readVLong();
}
项目:elasticsearch_my    文件:TermVectorsWriter.java   
private void writeTermStatistics(TermStatistics termStatistics) throws IOException {
    int docFreq = (int) termStatistics.docFreq();
    assert (docFreq >= -1);
    writePotentiallyNegativeVInt(docFreq);
    long ttf = termStatistics.totalTermFreq();
    assert (ttf >= -1);
    writePotentiallyNegativeVLong(ttf);
}
项目:elasticsearch_my    文件:DfsQueryPhaseTests.java   
public void testFailPhaseOnException() throws IOException {
    AtomicArray<DfsSearchResult> results = new AtomicArray<>(2);
    AtomicReference<AtomicArray<QuerySearchResultProvider>> responseRef = new AtomicReference<>();
    results.set(0, new DfsSearchResult(1, new SearchShardTarget("node1", new Index("test", "na"), 0)));
    results.set(1, new DfsSearchResult(2, new SearchShardTarget("node2", new Index("test", "na"), 0)));
    results.get(0).termsStatistics(new Term[0], new TermStatistics[0]);
    results.get(1).termsStatistics(new Term[0], new TermStatistics[0]);

    SearchPhaseController controller = new SearchPhaseController(Settings.EMPTY, BigArrays.NON_RECYCLING_INSTANCE, null);
    SearchTransportService searchTransportService = new SearchTransportService(
        Settings.builder().put("search.remote.connect", false).build(), null,  null) {

        @Override
        public void sendExecuteQuery(Transport.Connection connection, QuerySearchRequest request, SearchTask task,
                                     ActionListener<QuerySearchResult> listener) {
            if (request.id() == 1) {
                QuerySearchResult queryResult = new QuerySearchResult(123, new SearchShardTarget("node1", new Index("test", "na"), 0));
                queryResult.topDocs(new TopDocs(1, new ScoreDoc[] {new ScoreDoc(42, 1.0F)}, 2.0F), new DocValueFormat[0]);
                queryResult.size(2); // the size of the result set
                listener.onResponse(queryResult);
            } else if (request.id() == 2) {
               throw new UncheckedIOException(new MockDirectoryWrapper.FakeIOException());
            } else {
                fail("no such request ID: " + request.id());
            }
        }
    };
    MockSearchPhaseContext mockSearchPhaseContext = new MockSearchPhaseContext(2);
    mockSearchPhaseContext.searchTransport = searchTransportService;
    DfsQueryPhase phase = new DfsQueryPhase(results, controller,
        (response) -> new SearchPhase("test") {
            @Override
            public void run() throws IOException {
                responseRef.set(response.results);
            }
        }, mockSearchPhaseContext);
    assertEquals("dfs_query", phase.getName());
    expectThrows(UncheckedIOException.class, () -> phase.run());
    assertTrue(mockSearchPhaseContext.releasedSearchContexts.isEmpty()); // phase execution will clean up on the contexts
}
项目:lams    文件:SimilarityBase.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  BasicStats stats[] = new BasicStats[termStats.length];
  for (int i = 0; i < termStats.length; i++) {
    stats[i] = newStats(collectionStats.field(), queryBoost);
    fillBasicStats(stats[i], collectionStats, termStats[i]);
  }
  return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
}
项目:lams    文件:SimilarityBase.java   
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
 *  Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  // #positions(field) must be >= #positions(term)
  assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
  long numberOfDocuments = collectionStats.maxDoc();

  long docFreq = termStats.docFreq();
  long totalTermFreq = termStats.totalTermFreq();

  // codec does not supply totalTermFreq: substitute docFreq
  if (totalTermFreq == -1) {
    totalTermFreq = docFreq;
  }

  final long numberOfFieldTokens;
  final float avgFieldLength;

  long sumTotalTermFreq = collectionStats.sumTotalTermFreq();

  if (sumTotalTermFreq <= 0) {
    // field does not exist;
    // We have to provide something if codec doesnt supply these measures,
    // or if someone omitted frequencies for the field... negative values cause
    // NaN/Inf for some scorers.
    numberOfFieldTokens = docFreq;
    avgFieldLength = 1;
  } else {
    numberOfFieldTokens = sumTotalTermFreq;
    avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
  }

  // TODO: add sumDocFreq for field (numberOfFieldPostings)
  stats.setNumberOfDocuments(numberOfDocuments);
  stats.setNumberOfFieldTokens(numberOfFieldTokens);
  stats.setAvgFieldLength(avgFieldLength);
  stats.setDocFreq(docFreq);
  stats.setTotalTermFreq(totalTermFreq);
}
项目:lams    文件:BM25Similarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:lams    文件:BM25Similarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
}
项目:lams    文件:PerFieldSimilarityWrapper.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  PerFieldSimWeight weight = new PerFieldSimWeight();
  weight.delegate = get(collectionStats.field());
  weight.delegateWeight = weight.delegate.computeWeight(queryBoost, collectionStats, termStats);
  return weight;
}
项目:lams    文件:MultiSimilarity.java   
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  SimWeight subStats[] = new SimWeight[sims.length];
  for (int i = 0; i < subStats.length; i++) {
    subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
  }
  return new MultiStats(subStats);
}
项目:lams    文件:LMSimilarity.java   
/**
 * Computes the collection probability of the current term in addition to the
 * usual statistics.
 */
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  super.fillBasicStats(stats, collectionStats, termStats);
  LMStats lmStats = (LMStats) stats;
  lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
项目:lams    文件:TFIDFSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:lams    文件:TFIDFSimilarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  final Explanation idf = termStats.length == 1
  ? idfExplain(collectionStats, termStats[0])
  : idfExplain(collectionStats, termStats);
  return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:Elasticsearch    文件:AggregatedDfs.java   
@Override
public void readFrom(StreamInput in) throws IOException {
    int size = in.readVInt();
    termStatistics = HppcMaps.newMap(size);
    for (int i = 0; i < size; i++) {
        Term term = new Term(in.readString(), in.readBytesRef());
        TermStatistics stats = new TermStatistics(in.readBytesRef(), 
                in.readVLong(), 
                DfsSearchResult.subOne(in.readVLong()));
        termStatistics.put(term, stats);
    }
    fieldStatistics = DfsSearchResult.readFieldStats(in);
    maxDoc = in.readVLong();
}
项目:Elasticsearch    文件:TermVectorsWriter.java   
private void writeTermStatistics(TermStatistics termStatistics) throws IOException {
    int docFreq = (int) termStatistics.docFreq();
    assert (docFreq >= -1);
    writePotentiallyNegativeVInt(docFreq);
    long ttf = termStatistics.totalTermFreq();
    assert (ttf >= -1);
    writePotentiallyNegativeVLong(ttf);
}
项目:ir-generalized-translation-models    文件:BM25SimilarityLossless.java   
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  return new BM25StatsFixed(collectionStats.field(), k1, b, idf, avgdl);
}
项目:linden    文件:LindenSimilarity.java   
@Override
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
  final long df = termStats.docFreq();
  final long max = collectionStats.maxDoc();
  final float idf = idfManager.getIDF(termStats.term().utf8ToString());
  return new Explanation(idf, "idf(docFreq=" + df + ", maxDocs=" + max + ")");
}
项目:lucene4ir    文件:BM25Similarity.java   
@Override
public final SimWeight computeWeight(CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, avgdl, cache);
}
项目:lucene4ir    文件:OKAPIBM25Similarity.java   
@Override
   public final SimWeight computeWeight(CollectionStatistics collectionStats,
                 TermStatistics... termStats)
   {
long  N, n;
float idf_, avdl;

idf_ = 1.0f;

N    = collectionStats.docCount();
if (N == -1)
    N = collectionStats.maxDoc();

avdl = collectionStats.sumTotalTermFreq() / N;

if (termStats.length == 1) {
    n    = termStats[0].docFreq();
    idf_ = idf(n, N);
}
else { /* computation for a phrase */
    for (final TermStatistics stat : termStats) {
    n     = stat.docFreq();
    idf_ += idf(n, N);
    }
}

return new TFIDFWeight(collectionStats.field(), idf_, avdl);
   }
项目:DoSeR-Disambiguation    文件:FuzzyLabelSimilarity.java   
@Override
public final SimWeight computeWeight(final float queryBoost,
        final CollectionStatistics collectionStats,
        final TermStatistics... termStats) {
    final Explanation idf = termStats.length == 1 ? this.idfExplain(
            collectionStats, termStats[0]) : this.idfExplain(
            collectionStats, termStats);
    return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:elasticsearch-simple-similarity    文件:SimpleSimilarity.java   
public final SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    if (termStats.length == 1) {
        return new SimpleScore(boost, collectionStats, termStats[0]);
    } else {
        return new SimpleScore(boost, collectionStats, termStats);
    }
}
项目:elasticsearch-simple-similarity    文件:SimpleSimilarity.java   
SimpleScore(float boost, CollectionStatistics collectionStats, TermStatistics termStats[]) {
    float total = 0.0f;
    List<Explanation> scores = new ArrayList<>();
    for (final TermStatistics stat : termStats) {
        String description = String.format("simple score for (%s:%s)", collectionStats.field(), stat.term().utf8ToString());
        scores.add(Explanation.match(1.0f, description));
        total += 1.0f;
    }
    this.score = Explanation.match(total, "total score, sum of:", scores);
    this.boost = Explanation.match(boost, "boost");
}
项目:LuceneDB    文件:DummySimilarity.java   
@Override
public SimWeight computeWeight(float queryBoost,
        CollectionStatistics collectionStats, TermStatistics... termStats) {
    return new SimWeight() {

        @Override
        public void normalize(float queryNorm, float topLevelBoost) {
        }

        @Override
        public float getValueForNormalization() {
            return 0;
        }
    };
}
项目:search    文件:SimilarityBase.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  BasicStats stats[] = new BasicStats[termStats.length];
  for (int i = 0; i < termStats.length; i++) {
    stats[i] = newStats(collectionStats.field(), queryBoost);
    fillBasicStats(stats[i], collectionStats, termStats[i]);
  }
  return stats.length == 1 ? stats[0] : new MultiSimilarity.MultiStats(stats);
}
项目:search    文件:SimilarityBase.java   
/** Fills all member fields defined in {@code BasicStats} in {@code stats}. 
 *  Subclasses can override this method to fill additional stats. */
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  // #positions(field) must be >= #positions(term)
  assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq();
  long numberOfDocuments = collectionStats.maxDoc();

  long docFreq = termStats.docFreq();
  long totalTermFreq = termStats.totalTermFreq();

  // codec does not supply totalTermFreq: substitute docFreq
  if (totalTermFreq == -1) {
    totalTermFreq = docFreq;
  }

  final long numberOfFieldTokens;
  final float avgFieldLength;

  long sumTotalTermFreq = collectionStats.sumTotalTermFreq();

  if (sumTotalTermFreq <= 0) {
    // field does not exist;
    // We have to provide something if codec doesnt supply these measures,
    // or if someone omitted frequencies for the field... negative values cause
    // NaN/Inf for some scorers.
    numberOfFieldTokens = docFreq;
    avgFieldLength = 1;
  } else {
    numberOfFieldTokens = sumTotalTermFreq;
    avgFieldLength = (float)numberOfFieldTokens / numberOfDocuments;
  }

  // TODO: add sumDocFreq for field (numberOfFieldPostings)
  stats.setNumberOfDocuments(numberOfDocuments);
  stats.setNumberOfFieldTokens(numberOfFieldTokens);
  stats.setAvgFieldLength(avgFieldLength);
  stats.setDocFreq(docFreq);
  stats.setTotalTermFreq(totalTermFreq);
}
项目:search    文件:BM25Similarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:search    文件:BM25Similarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);

  float avgdl = avgFieldLength(collectionStats);

  // compute freq-independent part of bm25 equation across all norm values
  float cache[] = new float[256];
  for (int i = 0; i < cache.length; i++) {
    cache[i] = k1 * ((1 - b) + b * decodeNormValue((byte)i) / avgdl);
  }
  return new BM25Stats(collectionStats.field(), idf, queryBoost, avgdl, cache);
}
项目:search    文件:PerFieldSimilarityWrapper.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  PerFieldSimWeight weight = new PerFieldSimWeight();
  weight.delegate = get(collectionStats.field());
  weight.delegateWeight = weight.delegate.computeWeight(queryBoost, collectionStats, termStats);
  return weight;
}
项目:search    文件:MultiSimilarity.java   
@Override
public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  SimWeight subStats[] = new SimWeight[sims.length];
  for (int i = 0; i < subStats.length; i++) {
    subStats[i] = sims[i].computeWeight(queryBoost, collectionStats, termStats);
  }
  return new MultiStats(subStats);
}
项目:search    文件:LMSimilarity.java   
/**
 * Computes the collection probability of the current term in addition to the
 * usual statistics.
 */
@Override
protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) {
  super.fillBasicStats(stats, collectionStats, termStats);
  LMStats lmStats = (LMStats) stats;
  lmStats.setCollectionProbability(collectionModel.computeProbability(stats));
}
项目:search    文件:TFIDFSimilarity.java   
/**
 * Computes a score factor for a phrase.
 * 
 * <p>
 * The default implementation sums the idf factor for
 * each term in the phrase.
 * 
 * @param collectionStats collection-level statistics
 * @param termStats term-level statistics for the terms in the phrase
 * @return an Explain object that includes both an idf 
 *         score factor for the phrase and an explanation 
 *         for each term.
 */
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
  final long max = collectionStats.maxDoc();
  float idf = 0.0f;
  final Explanation exp = new Explanation();
  exp.setDescription("idf(), sum of:");
  for (final TermStatistics stat : termStats ) {
    final long df = stat.docFreq();
    final float termIdf = idf(df, max);
    exp.addDetail(new Explanation(termIdf, "idf(docFreq=" + df + ", maxDocs=" + max + ")"));
    idf += termIdf;
  }
  exp.setValue(idf);
  return exp;
}
项目:search    文件:TFIDFSimilarity.java   
@Override
public final SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
  final Explanation idf = termStats.length == 1
  ? idfExplain(collectionStats, termStats[0])
  : idfExplain(collectionStats, termStats);
  return new IDFStats(collectionStats.field(), idf, queryBoost);
}
项目:elasticsearch-vectorize    文件:VectorizeService.java   
private void processTermVectorsFields(Vectorizer vectorizer, Fields termVectorsFields) throws IOException {
    for (String fieldName : termVectorsFields) {
        TermsEnum termsEnum = termVectorsFields.terms(fieldName).iterator();
        while (termsEnum.next() != null) {
            Term term = new Term(fieldName, termsEnum.term());
            TermStatistics termStatistics = new TermStatistics(termsEnum.term(), termsEnum.docFreq(), termsEnum.totalTermFreq());
            int freq = termsEnum.postings(null, null, PostingsEnum.ALL).freq();
            vectorizer.add(term, termStatistics, freq);
        }
    }
}