Java 类org.apache.lucene.search.similarities.Similarity 实例源码

项目:lucene-custom-query    文件:SeqSpanScorer.java   
SeqSpanScorer(SeqSpanWeight weight, PostingsAndFreq[] postings,
                  Similarity.SimScorer docScorer, boolean needsScores,
                  float matchCost) throws IOException {
  super(weight);
  this.selfWeight = weight;
  this.docScorer = docScorer;
  this.needsScores = needsScores;

  List<DocIdSetIterator> iterators = new ArrayList<>();
  List<PostingsAndPosition> postingsAndPositions = new ArrayList<>();
  for(PostingsAndFreq posting : postings) {
    iterators.add(posting.postings);
    postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
  }
  conjunction = ConjunctionDISI.intersectIterators(iterators);
  this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
  this.matchCost = matchCost;
}
项目:lams    文件:ExactPhraseScorer.java   
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
                  Similarity.SimScorer docScorer) throws IOException {
  super(weight);
  this.docScorer = docScorer;

  chunkStates = new ChunkState[postings.length];

  endMinus1 = postings.length-1;

  lead = postings[0].postings;
  // min(cost)
  cost = lead.cost();

  for(int i=0;i<postings.length;i++) {
    chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
  }
}
项目:lams    文件:PayloadTermQuery.java   
protected void processPayload(Similarity similarity) throws IOException {
  if (termSpans.isPayloadAvailable()) {
    final DocsAndPositionsEnum postings = termSpans.getPostings();
    payload = postings.getPayload();
    if (payload != null) {
      payloadScore = function.currentScore(doc, term.field(),
                                           spans.start(), spans.end(), payloadsSeen, payloadScore,
                                           docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
    } else {
      payloadScore = function.currentScore(doc, term.field(),
                                           spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
    }
    payloadsSeen++;

  } else {
    // zero out the payload?
  }
}
项目:search    文件:MemoryIndex.java   
@Override
public NumericDocValues getNormValues(String field) {
  FieldInfo fieldInfo = fieldInfos.get(field);
  if (fieldInfo == null || fieldInfo.omitsNorms())
    return null;
  NumericDocValues norms = cachedNormValues;
  Similarity sim = getSimilarity();
  if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
    Info info = getInfo(field);
    int numTokens = info != null ? info.numTokens : 0;
    int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
    float boost = info != null ? info.getBoost() : 1.0f; 
    FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
    long value = sim.computeNorm(invertState);
    norms = new MemoryIndexNormDocValues(value);
    // cache it for future reuse
    cachedNormValues = norms;
    cachedFieldName = field;
    cachedSimilarity = sim;
    if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
  }
  return norms;
}
项目:search    文件:TestTaxonomyFacetCounts.java   
public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new DefaultSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
项目:search    文件:ExactPhraseScorer.java   
ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
                  Similarity.SimScorer docScorer) throws IOException {
  super(weight);
  this.docScorer = docScorer;

  chunkStates = new ChunkState[postings.length];

  endMinus1 = postings.length-1;

  lead = postings[0].postings;
  // min(cost)
  cost = lead.cost();

  for(int i=0;i<postings.length;i++) {
    chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
  }
}
项目:search    文件:PayloadTermQuery.java   
protected void processPayload(Similarity similarity) throws IOException {
  if (termSpans.isPayloadAvailable()) {
    final DocsAndPositionsEnum postings = termSpans.getPostings();
    payload = postings.getPayload();
    if (payload != null) {
      payloadScore = function.currentScore(doc, term.field(),
                                           spans.start(), spans.end(), payloadsSeen, payloadScore,
                                           docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
    } else {
      payloadScore = function.currentScore(doc, term.field(),
                                           spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
    }
    payloadsSeen++;

  } else {
    // zero out the payload?
  }
}
项目:search    文件:PayloadHelper.java   
/**
 * Sets up a RAMDirectory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
 * and analyzes them using the PayloadAnalyzer
 * @param similarity The Similarity class to use in the Searcher
 * @param numDocs The num docs to add
 * @return An IndexSearcher
 */
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
  Directory directory = new MockDirectoryWrapper(random, new RAMDirectory());
  PayloadAnalyzer analyzer = new PayloadAnalyzer();

  // TODO randomize this
  IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
      TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
  // writer.infoStream = System.out;
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
    doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES));
    doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
    writer.addDocument(doc);
  }
  reader = DirectoryReader.open(writer, true);
  writer.close();

  IndexSearcher searcher = LuceneTestCase.newSearcher(reader);
  searcher.setSimilarity(similarity);
  return searcher;
}
项目:search    文件:TestBooleanMinShouldMatch.java   
public void testRewriteCoord1() throws Exception {
  final Similarity oldSimilarity = s.getSimilarity();
  try {
    s.setSimilarity(new DefaultSimilarity() {
      @Override
      public float coord(int overlap, int maxOverlap) {
        return overlap / ((float)maxOverlap + 1);
      }
    });
    BooleanQuery q1 = new BooleanQuery();
    q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
    BooleanQuery q2 = new BooleanQuery();
    q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
    q2.setMinimumNumberShouldMatch(1);
    TopDocs top1 = s.search(q1,null,100);
    TopDocs top2 = s.search(q2,null,100);
    assertSubsetOfSameScores(q2, top1, top2);
  } finally {
    s.setSimilarity(oldSimilarity);
  }
}
项目:search    文件:TestBooleanMinShouldMatch.java   
public void testRewriteNegate() throws Exception {
  final Similarity oldSimilarity = s.getSimilarity();
  try {
    s.setSimilarity(new DefaultSimilarity() {
      @Override
      public float coord(int overlap, int maxOverlap) {
        return overlap / ((float)maxOverlap + 1);
      }
    });
    BooleanQuery q1 = new BooleanQuery();
    q1.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
    BooleanQuery q2 = new BooleanQuery();
    q2.add(new TermQuery(new Term("data", "1")), BooleanClause.Occur.SHOULD);
    q2.add(new TermQuery(new Term("data", "Z")), BooleanClause.Occur.MUST_NOT);
    TopDocs top1 = s.search(q1,null,100);
    TopDocs top2 = s.search(q2,null,100);
    assertSubsetOfSameScores(q2, top1, top2);
  } finally {
    s.setSimilarity(oldSimilarity);
  }
}
项目:search    文件:TestBoolean2.java   
@Test
public void testQueries10() throws Exception {
  BooleanQuery query = new BooleanQuery();
  query.add(new TermQuery(new Term(field, "w3")), BooleanClause.Occur.MUST);
  query.add(new TermQuery(new Term(field, "xx")), BooleanClause.Occur.MUST);
  query.add(new TermQuery(new Term(field, "w2")), BooleanClause.Occur.MUST);
  query.add(new TermQuery(new Term(field, "zz")), BooleanClause.Occur.SHOULD);

  int[] expDocNrs = {2, 3};
  Similarity oldSimilarity = searcher.getSimilarity();
  try {
    searcher.setSimilarity(new DefaultSimilarity(){
      @Override
      public float coord(int overlap, int maxOverlap) {
        return overlap / ((float)maxOverlap - 1);
      }
    });
    queriesTest(query, expDocNrs);
  } finally {
    searcher.setSimilarity(oldSimilarity);
  }
}
项目:search    文件:TestNorms.java   
public void buildIndex(Directory dir) throws IOException {
  Random random = random();
  MockAnalyzer analyzer = new MockAnalyzer(random());
  analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  Similarity provider = new MySimProvider();
  config.setSimilarity(provider);
  RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
  final LineFileDocs docs = new LineFileDocs(random, defaultCodecSupportsDocValues());
  int num = atLeast(100);
  for (int i = 0; i < num; i++) {
    Document doc = docs.nextDoc();
    int boost = random().nextInt(255);
    Field f = new TextField(byteTestField, "" + boost, Field.Store.YES);
    f.setBoost(boost);
    doc.add(f);
    writer.addDocument(doc);
    doc.removeField(byteTestField);
    if (rarely()) {
      writer.commit();
    }
  }
  writer.commit();
  writer.close();
  docs.close();
}
项目:search    文件:IndexSchema.java   
static SimilarityFactory readSimilarity(SolrResourceLoader loader, Node node) {
  if (node==null) {
    return null;
  } else {
    SimilarityFactory similarityFactory;
    final String classArg = ((Element) node).getAttribute(SimilarityFactory.CLASS_NAME);
    final Object obj = loader.newInstance(classArg, Object.class, "search.similarities.");
    if (obj instanceof SimilarityFactory) {
      // configure a factory, get a similarity back
      final NamedList<Object> namedList = DOMUtil.childNodesToNamedList(node);
      namedList.add(SimilarityFactory.CLASS_NAME, classArg);
      SolrParams params = SolrParams.toSolrParams(namedList);
      similarityFactory = (SimilarityFactory)obj;
      similarityFactory.init(params);
    } else {
      // just like always, assume it's a Similarity and get a ClassCastException - reasonable error handling
      similarityFactory = new SimilarityFactory() {
        @Override
        public Similarity getSimilarity() {
          return (Similarity) obj;
        }
      };
    }
    return similarityFactory;
  }
}
项目:biospectra    文件:Classifier.java   
private void initialize(File indexPath, int kmerSize, int kmerSkips, boolean minStrandKmer, double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm, Similarity similarity) throws Exception {
    if(!indexPath.exists() || !indexPath.isDirectory()) {
        throw new IllegalArgumentException("indexPath is not a directory or does not exist");
    }

    this.indexPath = indexPath;
    this.kmerSize = kmerSize;
    this.kmerSkips = kmerSkips;
    this.minStrandKmer = minStrandKmer;
    this.queryAnalyzer = new KmerQueryAnalyzer(this.kmerSize, this.kmerSkips, this.minStrandKmer);
    Directory dir = new MMapDirectory(this.indexPath.toPath()); 
    this.indexReader = DirectoryReader.open(dir);
    this.indexSearcher = new IndexSearcher(this.indexReader);
    if(similarity != null) {
        this.indexSearcher.setSimilarity(similarity);
    }
    this.minShouldMatch = minShouldMatch;
    this.queryGenerationAlgorithm = queryGenerationAlgorithm;

    BooleanQuery.setMaxClauseCount(10000);
}
项目:elasticsearch_my    文件:HasChildQueryBuilder.java   
LateParsingQuery(Query toQuery, Query innerQuery, int minChildren, int maxChildren,
                 String parentType, ScoreMode scoreMode, ParentChildIndexFieldData parentChildIndexFieldData,
                 Similarity similarity) {
    this.toQuery = toQuery;
    this.innerQuery = innerQuery;
    this.minChildren = minChildren;
    this.maxChildren = maxChildren;
    this.parentType = parentType;
    this.scoreMode = scoreMode;
    this.parentChildIndexFieldData = parentChildIndexFieldData;
    this.similarity = similarity;
}
项目:elasticsearch_my    文件:EngineConfig.java   
/**
 * Creates a new {@link org.elasticsearch.index.engine.EngineConfig}
 */
public EngineConfig(OpenMode openMode, ShardId shardId, ThreadPool threadPool,
                    IndexSettings indexSettings, Engine.Warmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy,
                    MergePolicy mergePolicy, Analyzer analyzer,
                    Similarity similarity, CodecService codecService, Engine.EventListener eventListener,
                    TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy,
                    TranslogConfig translogConfig, TimeValue flushMergesAfter, ReferenceManager.RefreshListener refreshListeners,
                    long maxUnsafeAutoIdTimestamp) {
    if (openMode == null) {
        throw new IllegalArgumentException("openMode must not be null");
    }
    this.shardId = shardId;
    this.indexSettings = indexSettings;
    this.threadPool = threadPool;
    this.warmer = warmer == null ? (a) -> {} : warmer;
    this.store = store;
    this.deletionPolicy = deletionPolicy;
    this.mergePolicy = mergePolicy;
    this.analyzer = analyzer;
    this.similarity = similarity;
    this.codecService = codecService;
    this.eventListener = eventListener;
    codecName = indexSettings.getValue(INDEX_CODEC_SETTING);
    // We give IndexWriter a "huge" (256 MB) buffer, so it won't flush on its own unless the ES indexing buffer is also huge and/or
    // there are not too many shards allocated to this node.  Instead, IndexingMemoryController periodically checks
    // and refreshes the most heap-consuming shards when total indexing heap usage across all shards is too high:
    indexingBufferSize = new ByteSizeValue(256, ByteSizeUnit.MB);
    this.translogRecoveryPerformer = translogRecoveryPerformer;
    this.queryCache = queryCache;
    this.queryCachingPolicy = queryCachingPolicy;
    this.translogConfig = translogConfig;
    this.flushMergesAfter = flushMergesAfter;
    this.openMode = openMode;
    this.refreshListeners = refreshListeners;
    assert maxUnsafeAutoIdTimestamp >= IndexRequest.UNSET_AUTO_GENERATED_TIMESTAMP :
        "maxUnsafeAutoIdTimestamp must be >= -1 but was " + maxUnsafeAutoIdTimestamp;
    this.maxUnsafeAutoIdTimestamp = maxUnsafeAutoIdTimestamp;
}
项目:elasticsearch_my    文件:IndexModuleTests.java   
public void testAddSimilarity() throws IOException {
    Settings indexSettings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.similarity.my_similarity.type", "test_similarity")
            .put("index.similarity.my_similarity.key", "there is a key")
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
    IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings),
            new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()));
    module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings) -> new SimilarityProvider() {
        @Override
        public String name() {
            return string;
        }

        @Override
        public Similarity get() {
            return new TestSimilarity(providerSettings.get("key"));
        }
    });

    IndexService indexService = newIndexService(module);
    SimilarityService similarityService = indexService.similarityService();
    assertNotNull(similarityService.getSimilarity("my_similarity"));
    assertTrue(similarityService.getSimilarity("my_similarity").get() instanceof TestSimilarity);
    assertEquals("my_similarity", similarityService.getSimilarity("my_similarity").name());
    assertEquals("there is a key", ((TestSimilarity) similarityService.getSimilarity("my_similarity").get()).key);
    indexService.close("simon says", false);
}
项目:elasticsearch_my    文件:HasChildQueryBuilderTests.java   
public void testNonDefaultSimilarity() throws Exception {
    QueryShardContext shardContext = createShardContext();
    HasChildQueryBuilder hasChildQueryBuilder = QueryBuilders.hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
    HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
    Similarity expected = SimilarityService.BUILT_IN.get(similarity)
        .apply(similarity, Settings.EMPTY, Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build())
        .get();
    assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}
项目:lams    文件:SpanScorer.java   
protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer)
throws IOException {
  super(weight);
  this.docScorer = docScorer;
  this.spans = spans;

  doc = -1;
  more = spans.next();
}
项目:lams    文件:SloppyPhraseScorer.java   
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
    int slop, Similarity.SimScorer docScorer) {
  super(weight);
  this.docScorer = docScorer;
  this.slop = slop;
  this.numPostings = postings==null ? 0 : postings.length;
  pq = new PhraseQueue(postings.length);
  // min(cost)
  cost = postings[0].postings.cost();
  // convert tps to a list of phrase positions.
  // note: phrase-position differs from term-position in that its position
  // reflects the phrase offset: pp.pos = tp.pos - offset.
  // this allows to easily identify a matching (exact) phrase 
  // when all PhrasePositions have exactly the same position.
  if (postings.length > 0) {
    min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
    max = min;
    max.doc = -1;
    for (int i = 1; i < postings.length; i++) {
      PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
      max.next = pp;
      max = pp;
      max.doc = -1;
    }
    max.next = min; // make it cyclic for easier manipulation
  }
}
项目:lams    文件:IndexWriterConfig.java   
/**
 * Expert: set the {@link Similarity} implementation used by this IndexWriter.
 * <p>
 * <b>NOTE:</b> the similarity cannot be null.
 *
 * <p>Only takes effect when IndexWriter is first created. */
public IndexWriterConfig setSimilarity(Similarity similarity) {
  if (similarity == null) {
    throw new IllegalArgumentException("similarity must not be null");
  }
  this.similarity = similarity;
  return this;
}
项目:Elasticsearch    文件:SimilarityService.java   
@Inject
public SimilarityService(Index index, IndexSettingsService indexSettingsService,
                         final SimilarityLookupService similarityLookupService, final MapperService mapperService) {
    super(index, indexSettingsService.getSettings());
    this.similarityLookupService = similarityLookupService;
    this.mapperService = mapperService;

    Similarity defaultSimilarity = similarityLookupService.similarity(SimilarityLookupService.DEFAULT_SIMILARITY).get();
    // Expert users can configure the base type as being different to default, but out-of-box we use default.
    Similarity baseSimilarity = (similarityLookupService.similarity("base") != null) ? similarityLookupService.similarity("base").get() :
            defaultSimilarity;

    this.perFieldSimilarity = (mapperService != null) ? new PerFieldSimilarity(defaultSimilarity, baseSimilarity, mapperService) :
            defaultSimilarity;
}
项目:Elasticsearch    文件:HasChildQueryParser.java   
public static Query joinUtilHelper(String parentType, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity, Query toQuery, ScoreType scoreType, Query innerQuery, int minChildren, int maxChildren) throws IOException {
    ScoreMode scoreMode;
    // TODO: move entirely over from ScoreType to org.apache.lucene.join.ScoreMode, when we drop the 1.x parent child code.
    switch (scoreType) {
        case NONE:
            scoreMode = ScoreMode.None;
            break;
        case MIN:
            scoreMode = ScoreMode.Min;
            break;
        case MAX:
            scoreMode = ScoreMode.Max;
            break;
        case SUM:
            scoreMode = ScoreMode.Total;
            break;
        case AVG:
            scoreMode = ScoreMode.Avg;
            break;
        default:
            throw new UnsupportedOperationException("score type [" + scoreType + "] not supported");
    }
    // 0 in pre 2.x p/c impl means unbounded
    if (maxChildren == 0) {
        maxChildren = Integer.MAX_VALUE;
    }
    return new LateParsingQuery(toQuery, innerQuery, minChildren, maxChildren, parentType, scoreMode, parentChildIndexFieldData, similarity);
}
项目:Elasticsearch    文件:HasChildQueryParser.java   
LateParsingQuery(Query toQuery, Query innerQuery, int minChildren, int maxChildren, String parentType, ScoreMode scoreMode, ParentChildIndexFieldData parentChildIndexFieldData, Similarity similarity) {
    this.toQuery = toQuery;
    this.innerQuery = innerQuery;
    this.minChildren = minChildren;
    this.maxChildren = maxChildren;
    this.parentType = parentType;
    this.scoreMode = scoreMode;
    this.parentChildIndexFieldData = parentChildIndexFieldData;
    this.similarity = similarity;
}
项目:Elasticsearch    文件:EngineConfig.java   
/**
 * Creates a new {@link org.elasticsearch.index.engine.EngineConfig}
 */
public EngineConfig(ShardId shardId, ThreadPool threadPool, ShardIndexingService indexingService,
                    Settings indexSettings, IndicesWarmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy,
                    MergePolicy mergePolicy, MergeSchedulerConfig mergeSchedulerConfig, Analyzer analyzer,
                    Similarity similarity, CodecService codecService, Engine.FailedEngineListener failedEngineListener,
                    TranslogRecoveryPerformer translogRecoveryPerformer, QueryCache queryCache, QueryCachingPolicy queryCachingPolicy, IndexSearcherWrappingService wrappingService, TranslogConfig translogConfig) {
    this.shardId = shardId;
    this.indexSettings = indexSettings;
    this.threadPool = threadPool;
    this.indexingService = indexingService;
    this.warmer = warmer;
    this.store = store;
    this.deletionPolicy = deletionPolicy;
    this.mergePolicy = mergePolicy;
    this.mergeSchedulerConfig = mergeSchedulerConfig;
    this.analyzer = analyzer;
    this.similarity = similarity;
    this.codecService = codecService;
    this.failedEngineListener = failedEngineListener;
    this.wrappingService = wrappingService;
    this.optimizeAutoGenerateId = indexSettings.getAsBoolean(EngineConfig.INDEX_OPTIMIZE_AUTOGENERATED_ID_SETTING, false);
    this.compoundOnFlush = indexSettings.getAsBoolean(EngineConfig.INDEX_COMPOUND_ON_FLUSH, compoundOnFlush);
    codecName = indexSettings.get(EngineConfig.INDEX_CODEC_SETTING, EngineConfig.DEFAULT_CODEC_NAME);
    // We start up inactive and rely on IndexingMemoryController to give us our fair share once we start indexing:
    indexingBufferSize = IndexingMemoryController.INACTIVE_SHARD_INDEXING_BUFFER;
    gcDeletesInMillis = indexSettings.getAsTime(INDEX_GC_DELETES_SETTING, EngineConfig.DEFAULT_GC_DELETES).millis();
    versionMapSizeSetting = indexSettings.get(INDEX_VERSION_MAP_SIZE, DEFAULT_VERSION_MAP_SIZE);
    updateVersionMapSize();
    this.translogRecoveryPerformer = translogRecoveryPerformer;
    this.forceNewTranslog = indexSettings.getAsBoolean(INDEX_FORCE_NEW_TRANSLOG, false);
    this.queryCache = queryCache;
    this.queryCachingPolicy = queryCachingPolicy;
    this.translogConfig = translogConfig;
}
项目:ir-generalized-translation-models    文件:AugmentedTermScorer.java   
/**
 * Construct an <code>query.{@link AugmentedTermScorer}</code>.
 *
 * @param weight
 *          The weight of the <code>Term</code> in the query.
 * @param mainTerm
 *          An iterator over the documents matching the main <code>Term</code>.
 * @param similarPostings
 *          A list of <code>PostingsEnumWeightTuple</code>: term iterator, weight pairs
 * @param docScorer
 *          The <code>Similarity.SimScorer</code> implementation
 *          to be used for score computations.
 */
public AugmentedTermScorer(Weight weight, PostingsEnum mainTerm, List<PostingsEnumWeightTuple> similarPostings, Similarity.SimScorer docScorer) {
    super(weight);

    this.postings = new PostingsEnumWeightTuple[similarPostings.size() + 1];
    this.postings[0] = new PostingsEnumWeightTuple(mainTerm,1f);
    for (int i = 0; i < similarPostings.size(); i++) {
        this.postings[i + 1] = similarPostings.get(i);
    }

    this.iterator = new MultiDocIdSetIterator(this.postings);

    this.docScorer = docScorer;
}
项目:linden    文件:LindenSimilarityFactory.java   
@Override
public Similarity getInstance(Map<String, String> params) throws IOException {
  String dict = params.get("dict");
  String normLowerBound = params.get("norm");
  Similarity similarity;
  if (Strings.isNullOrEmpty(normLowerBound)) {
    similarity = new LindenSimilarity(IDFManager.createInstance(dict));
  } else {
    similarity = new LindenSimilarity(IDFManager.createInstance(dict), Float.parseFloat(normLowerBound));
  }
  return similarity;
}
项目:linden    文件:TermDocsEnum.java   
public TermDocsEnum(FlexibleQuery.FlexibleTerm term, int docFreq, DocsAndPositionsEnum postings, Similarity.SimScorer docScorer, int termPos) throws IOException {
  this.term = term;
  this.postings = postings;
  this.docFreq = docFreq;
  this.docScorer = docScorer;
  this.termPos = termPos;
}
项目:linden    文件:TermDocsEnum.java   
public Explanation explain(Similarity similarity, Query query) {
  if (!isMatched())
    return null;
  ComplexExplanation result = new ComplexExplanation();
  result.setDescription("weight("+query+" in "+ doc +") [" + similarity.getClass().getSimpleName() + "], result of:");
  Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
  result.addDetail(scoreExplanation);
  result.setValue(scoreExplanation.getValue());
  result.setMatch(true);
  return result;
}
项目:elasticsearch-learning-to-rank    文件:LtrQueryTests.java   
@Before
public void setupIndex() throws IOException {
    dirUnderTest = newDirectory();
    List<Similarity> sims = Arrays.asList(
            new ClassicSimilarity(),
            new SweetSpotSimilarity(), // extends Classic
            new BM25Similarity(),
            new LMDirichletSimilarity(),
            new BooleanSimilarity(),
            new LMJelinekMercerSimilarity(0.2F),
            new AxiomaticF3LOG(0.5F, 10),
            new DFISimilarity(new IndependenceChiSquared()),
            new DFRSimilarity(new BasicModelBE(), new AfterEffectB(), new NormalizationH1()),
            new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
        );
    similarity = sims.get(random().nextInt(sims.size()));

    indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        doc.add(newField("field", docs[i], Store.YES));
        indexWriterUnderTest.addDocument(doc);
    }
    indexWriterUnderTest.commit();
    indexWriterUnderTest.forceMerge(1);
    indexWriterUnderTest.flush();


    indexReaderUnderTest = indexWriterUnderTest.getReader();
    searcherUnderTest = newSearcher(indexReaderUnderTest);
    searcherUnderTest.setSimilarity(similarity);
}
项目:xltsearch    文件:Config.java   
void resolve() {
    if (resolved) { return; }
    // else: resolved == false
    if (getLastUpdated() == INDEX_INVALIDATED) { return; }
    // hashAlgorithm
    hashAlgorithm = get("hash.algorithm");
    if (hashAlgorithm == null) { return; }
    // version
    version = get("lucene.version");
    if (version == null) { return; }
    // analyzer
    Function<Version,Analyzer> analyzerFactory = get("lucene.analyzer");
    if (analyzerFactory == null) { return; }
    analyzer = analyzerFactory.apply(version);
    // similarity
    Supplier<Similarity> similarityFactory = get("scoring.model");
    if (similarityFactory == null) { return; }
    similarity = similarityFactory.get();
    // directory
    Function<File,Directory> directoryFactory = get("directory.type");
    if (directoryFactory == null) { return; }
    directory = directoryFactory.apply(
        new File(configDir.getPath() + File.separator + INDEX_DIR));
    if (directory == null) { return; }
    // we made it: config is properly resolved
    resolved = true;
}
项目:DoSeR-Disambiguation    文件:LearnToRankFuzzyQuery.java   
/**
 * Create a new FuzzyQuery that will match terms with an edit distance of at
 * most <code>maxEdits</code> to <code>term</code>. If a
 * <code>prefixLength</code> &gt; 0 is specified, a common prefix of that
 * length is also required.
 * 
 * @param term
 *            the term to search for
 * @param maxEdits
 *            must be >= 0 and <=
 *            {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
 * @param prefixLength
 *            length of common (non-fuzzy) prefix
 * @param maxExpansions
 *            the maximum number of terms to match. If this number is
 *            greater than {@link BooleanQuery#getMaxClauseCount} when the
 *            query is rewritten, then the maxClauseCount will be used
 *            instead.
 * @param transpositions
 *            true if transpositions should be treated as a primitive edit
 *            operation. If this is false, comparisons will implement the
 *            classic Levenshtein algorithm.
 */
public LearnToRankFuzzyQuery(final Term term, final int maxEdits,
        final int prefixLength, final int maxExpansions,
        final boolean transpositions, final Similarity sim) {
    super(term.field());

    if ((maxEdits < 0)
            || (maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE)) {
        throw new IllegalArgumentException(
                "maxEdits must be between 0 and "
                        + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
    }
    if (prefixLength < 0) {
        throw new IllegalArgumentException(
                "prefixLength cannot be negative.");
    }
    if (maxExpansions < 0) {
        throw new IllegalArgumentException(
                "maxExpansions cannot be negative.");
    }

    this.term = term;
    this.maxEdits = maxEdits;
    this.prefixLength = prefixLength;
    this.transpositions = transpositions;
    this.maxExpansions = maxExpansions;
    setRewriteMethod(new LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
            maxExpansions, sim));
    // setRewriteMethod(new
    // LearnToRankFuzzyQuery.LTRTopTermsScoringBooleanQueryRewrite(
    // maxExpansions));
}
项目:DoSeR-Disambiguation    文件:LearnToRankTermQuery.java   
/**
 * Expert: constructs a TermQuery that will use the provided docFreq instead
 * of looking up the docFreq against the searcher.
 */
public LearnToRankTermQuery(final Term term, final int docFreq,
        final Similarity sim) {
    this.term = term;
    this.docFreq = docFreq;
    perReaderTermS = null;
    this.sim = sim;
}
项目:DoSeR-Disambiguation    文件:LearnToRankTermQuery.java   
/**
 * Expert: constructs a TermQuery that will use the provided docFreq instead
 * of looking up the docFreq against the searcher.
 */
public LearnToRankTermQuery(final Term term, final TermContext states,
        final Similarity sim) {
    assert states != null;
    this.term = term;
    docFreq = states.docFreq();
    perReaderTermS = states;
    this.sim = sim;
}
项目:DoSeR-Disambiguation    文件:LuceneFeatures.java   
public static Query queryStringTerm(String str, String field,
        Similarity sim, Occur occ, int maxclause) {

    final String[] split = str.split(" ");
    final LTRBooleanQuery bquery = new LTRBooleanQuery();
    for (final String element : split) {
        final LearnToRankTermQuery tquery = new LearnToRankTermQuery(
                new Term(field, element.toLowerCase(Locale.US)), sim);
        bquery.add(tquery, occ);
    }
    return bquery;
}
项目:DoSeR-Disambiguation    文件:LuceneFeatures.java   
public static Query queryStringFuzzy(String str, String field,
        Similarity sim, Occur occ, int maxclause) {

    final String[] split = str.split(" ");
    final LTRBooleanQuery bquery = new LTRBooleanQuery();
    for (final String element : split) {
        final LearnToRankFuzzyQuery tquery = new LearnToRankFuzzyQuery(
                new Term(field, element.toLowerCase(Locale.US)), sim);
        bquery.add(tquery, occ);

    }
    return bquery;
}
项目:DoSeR-Disambiguation    文件:AbstractKnowledgeBase.java   
AbstractKnowledgeBase(String uri, boolean dynamic, Similarity sim) {
    super();
    this.indexUri = uri;
    this.dynamic = dynamic;

    File indexDir = new File(indexUri);
    Directory dir;
    try {
        dir = FSDirectory.open(indexDir);
        this.manager = new SearcherManager(dir, new SearcherFactory());
    } catch (IOException e) {
        logger.error("IOException in "+AbstractKnowledgeBase.class.getName(), e);
    }
}
项目:search    文件:IDFValueSource.java   
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
  while (sim instanceof PerFieldSimilarityWrapper) {
    sim = ((PerFieldSimilarityWrapper)sim).get(field);
  }
  if (sim instanceof TFIDFSimilarity) {
    return (TFIDFSimilarity)sim;
  } else {
    return null;
  }
}
项目:search    文件:TestValueSources.java   
public void testIDF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    searcher.setSimilarity(new DefaultSimilarity());
    assertHits(new FunctionQuery(
        new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))),
        new float[] { 0.5945349f, 0.5945349f });
  } finally {
    searcher.setSimilarity(saved);
  }
}
项目:search    文件:TestValueSources.java   
public void testNorm() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new DefaultSimilarity());
    assertHits(new FunctionQuery(
        new NormValueSource("byte")),
        new float[] { 0f, 0f });
  } finally {
    searcher.setSimilarity(saved);
  }
}