Java 类org.apache.lucene.search.similar.MoreLikeThis 实例源码

项目:ephesoft    文件:LuceneEngine.java   
private Query addPageToMoreLike(String actualFolderLocation, Query query, MoreLikeThis moreLikeThis, String eachPage,
        String hocrContent) throws IOException {
    Query localQuery = query;
    if (null != hocrContent) {
        try {
            InputStream inputStream = new ByteArrayInputStream(hocrContent.getBytes("UTF-8"));
            localQuery = moreLikeThis.like(inputStream);
        } catch (UnsupportedEncodingException e) {
            LOGGER.error(e.getMessage(), e);
            localQuery = moreLikeThis.like(new File(actualFolderLocation + File.separator + eachPage));
        }
    } else {
        localQuery = moreLikeThis.like(new File(actualFolderLocation + File.separator + eachPage));
    }
    return localQuery;
}
项目:ephesoft    文件:LuceneEngine.java   
private void settingMoreLikeParameters(String minTermFreq, String minDocFreq, String minWordLength, String maxQueryTerms,
        String[] allIndexFields, String[] allStopWords, MoreLikeThis moreLikeThis) {
    moreLikeThis.setFieldNames(allIndexFields);
    // moreLikeThis.setBoost(true);
    // moreLikeThis.setBoostFactor(10.0f);
    moreLikeThis.setMinTermFreq(Integer.valueOf(minTermFreq));
    moreLikeThis.setMinDocFreq(Integer.valueOf(minDocFreq));
    moreLikeThis.setMinWordLen(Integer.valueOf(minWordLength));
    moreLikeThis.setMaxQueryTerms(Integer.valueOf(maxQueryTerms));
    if (allStopWords != null && allStopWords.length > 0) {
        Set<String> stopWordsTemp = new HashSet<String>();
        for (int i = 0; i < allStopWords.length; i++) {
            stopWordsTemp.add(allStopWords[i]);
        }
        moreLikeThis.setStopWords(stopWordsTemp);
    }
}
项目:ephesoft    文件:FuzzyLuceneEngine.java   
private MoreLikeThis updateQueryInfo(IndexReader reader, String minTermFreq, String minDocFreq, String minWordLength,
        String maxQueryTerms, String[] allStopWords, String[] allIndexFields) {
    MoreLikeThis moreLikeThis = new MoreLikeThis(reader);
    moreLikeThis.setFieldNames(allIndexFields);
    moreLikeThis.setMinTermFreq(Integer.parseInt(minTermFreq));
    moreLikeThis.setMinDocFreq(Integer.parseInt(minDocFreq));
    moreLikeThis.setMinWordLen(Integer.parseInt(minWordLength));
    moreLikeThis.setMaxQueryTerms(Integer.parseInt(maxQueryTerms));
    if (allStopWords != null && allStopWords.length > 0) {
        Set<String> stopWordsTemp = new HashSet<String>();
        for (int index = 0; index < allStopWords.length; index++) {
            stopWordsTemp.add(allStopWords[index]);
        }
        moreLikeThis.setStopWords(stopWordsTemp);
    }
    return moreLikeThis;
}
项目:opensearchserver    文件:MoreLikeThisRequest.java   
@Override
protected void setDefaultValues() {
    super.setDefaultValues();
    this.filterList = new FilterList(this.config);
    this.returnFieldList = new ReturnFieldList();
    this.docQuery = null;
    this.likeText = null;
    this.lang = LanguageEnum.UNDEFINED;
    this.analyzerName = null;
    this.fieldList = new ReturnFieldList();
    this.minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH;
    this.maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH;
    this.minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ;
    this.minTermFreq = MoreLikeThis.DEFAULT_MIN_TERM_FREQ;
    this.maxNumTokensParsed = MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED;
    this.maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS;
    this.boost = true;
    this.stopWords = null;
    this.start = 0;
    this.rows = 10;
    this.mltQuery = null;
}
项目:elki    文件:LuceneDistanceKNNQuery.java   
/**
 * Constructor.
 * 
 * @param distanceQuery Distance query
 */
public LuceneDistanceKNNQuery(DistanceQuery<DBID> distanceQuery, IndexReader ir, DBIDRange range) {
  super(distanceQuery);
  this.range = range;
  this.mlt = new MoreLikeThis(ir);
  this.is = new IndexSearcher(ir);
  mlt.setAnalyzer(new StandardAnalyzer(Version.LUCENE_36));
}
项目:elki    文件:LuceneDistanceRangeQuery.java   
/**
 * Constructor.
 * 
 * @param distanceQuery Distance query
 */
public LuceneDistanceRangeQuery(DistanceQuery<DBID> distanceQuery, IndexReader ir, DBIDRange ids) {
  super(distanceQuery);
  this.ids = ids;
  this.mlt = new MoreLikeThis(ir);
  this.is = new IndexSearcher(ir);
  mlt.setAnalyzer(new StandardAnalyzer(Version.LUCENE_36));
}
项目:webdsl    文件:AbstractEntitySearcher.java   
@SuppressWarnings( "unchecked" )
public F moreLikeThis(
        String likeText, int minWordLen, int maxWordLen, int minDocFreq, int maxDocFreqPct, int minTermFreq, int maxQueryTerms ) {

    moreLikeThisParams = likeText + "," + minWordLen + "," + maxWordLen + "," + minDocFreq + "," + maxDocFreqPct + "," + minTermFreq + "," + maxQueryTerms;

    IndexReader ir = getReader( );
    MoreLikeThis mlt = new MoreLikeThis( ir );
    mlt.setFieldNames( mltSearchFields );
    mlt.setAnalyzer( analyzer );
    mlt.setMinWordLen( minWordLen );
    mlt.setMaxWordLen( maxWordLen );
    mlt.setMaxDocFreqPct( maxDocFreqPct );
    mlt.setMinDocFreq( minDocFreq );
    mlt.setMinTermFreq( minTermFreq );
    mlt.setMaxQueryTerms( maxQueryTerms );

    try {
        currentQD.parsedQuery( mlt.like( new StringReader( likeText ) ) );
    } catch ( IOException e ) {
        Logger.error(e);
    } finally {
        closeReader( ir );
    }
    updateLuceneQuery = true;
    return ( F ) this;
}
项目:xodus    文件:ExodusLuceneTestsBase.java   
protected void createMoreLikeThis() throws IOException {
    closeMoreLikeThis();
    createIndexSearcher();
    moreLikeThis = new MoreLikeThis(indexReader);
    moreLikeThis.setAnalyzer(analyzer);
    moreLikeThis.setMinTermFreq(1);
    moreLikeThis.setMinDocFreq(1);
}
项目:KiraDB    文件:KiraDb.java   
/**
 * Find related (similar) documents based on given value and fields to examine
 *
 * @param r The Record Object (Document Class)
 * @param testStr The input value to use as the basis for similarity
 * @param fieldNames The names of the fields to examine
 * @param numHits The number of similar documents to retrieve
 * @param excludeDocId Optional "primary key" to exclude from results
 *
 * @return List<String> The list of matching records primary keys
 *
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws KiraException
 */

public List<String> relatedObjects(Record r, String testStr, String[] fieldNames, int numHits, String excludeDocId) throws IOException, ClassNotFoundException, KiraException {
       String key = makeKey(r.descriptor(), r.getPrimaryKeyName());

    List<String> results = new ArrayList<String>();
       FSDirectory idx;
       idx = FSDirectory.open(indexDirectory);

       IndexReader ir = IndexReader.open(idx);
       IndexSearcher is = new IndexSearcher(idx, true);
       MoreLikeThis mlt = new MoreLikeThis(ir);

     //lower some settings to MoreLikeThis will work with very short titles
       mlt.setMinTermFreq(1);
       mlt.setMinDocFreq(1);
       mlt.setMinWordLen(3);
       //String[] fieldNames = { "fulltext" };
       mlt.setFieldNames(fieldNames );
       Reader reader = new StringReader(testStr);
       org.apache.lucene.search.Query query = mlt.like( reader);
     //Search the index using the query and get the top 5 results
       TopDocs topDocs = is.search(query, numHits);
       //logger.info("found " + topDocs.totalHits + " topDocs for q:" + testStr);
       for ( ScoreDoc scoreDoc : topDocs.scoreDocs ) {
        Document doc = is.doc( scoreDoc.doc );
        String docId =  doc.get(key);
        if (docId != null) {
            if (excludeDocId == null || !docId.equals(excludeDocId)) {
                results.add(docId);
            }
        } else {
            logger.warning("found other document type? " + doc);
        }
       }
       is.close();
    return results;
}
项目:opensearchserver    文件:IndexSingle.java   
@Override
public MoreLikeThis getMoreLikeThis() throws SearchLibException {
    checkOnline(true);
    ReaderLocal reader = acquire();
    try {
        return reader.getMoreLikeThis();
    } finally {
        release(reader);
    }
}
项目:opensearchserver    文件:ReaderLocal.java   
@Override
public MoreLikeThis getMoreLikeThis() {
    return new MoreLikeThis(indexReader);
}
项目:opensearchserver    文件:ReaderInterface.java   
MoreLikeThis getMoreLikeThis() throws SearchLibException;