Java 类org.apache.lucene.search.spell.NGramDistance 实例源码

项目:elasticsearch_my    文件:DirectCandidateGeneratorBuilder.java   
private static StringDistance resolveDistance(String distanceVal) {
    distanceVal = distanceVal.toLowerCase(Locale.US);
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        // TODO Jaro and Winkler are 2 people - so apply same naming logic
        // as damerau_levenshtein
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {
        throw new IllegalArgumentException("Illegal distance option " + distanceVal);
    }
}
项目:CoreferenceResolution    文件:EntityLabelSimMatrixCreator.java   
public static Matrix getEntityLabelSimMatrix(TokenizedDocument[] documents) {
    int entityCount = 0;
    for (int i = 0; i < documents.length; ++i) {
        entityCount += documents[i].entities.length;
    }
    String labels[] = new String[entityCount];
    entityCount = 0;
    for (int d = 0; d < documents.length; ++d) {
        for (int e = 0; e < documents[d].entities.length; ++e) {
            labels[entityCount] = documents[d].entities[e].label;
            ++entityCount;
        }
    }
    Matrix stringSimMatrix = new Basic2DMatrix(entityCount, entityCount);
    NGramDistance nGramDistance = new NGramDistance(3);
    double similarity;
    for (int i = 0; i < labels.length; ++i) {
        stringSimMatrix.set(i, i, 1);
        for (int j = i + 1; j < labels.length; ++j) {
            similarity = nGramDistance.getDistance(labels[i], labels[j]);
            stringSimMatrix.set(i, j, similarity);
            stringSimMatrix.set(j, i, similarity);
        }
    }
    return stringSimMatrix;
}
项目:FOX    文件:CandidateUtil.java   
public CandidateUtil(final String file) throws IOException {
  final ClassLoader loader = Thread.currentThread().getContextClassLoader();
  final InputStream is = loader.getResourceAsStream(file);

  final Properties prop = new Properties();
  prop.load(is);

  nodeType = prop.getProperty("nodeType");
  nGramDistance = new NGramDistance(Integer.valueOf(prop.getProperty("ngramDistance")));
  index = new TripleIndex(file);
  context = Boolean.valueOf(prop.getProperty("context"));
  if (context == true) { // in case the index by context exist
    index2 = new TripleIndexContext();
  }
  corporationAffixCleaner = new CorporationAffixCleaner();
  domainWhiteLister = new DomainWhiteLister(index);
  popularity = Boolean.valueOf(prop.getProperty("popularity"));
  acronym = Boolean.valueOf(prop.getProperty("acronym"));
  commonEntities = Boolean.valueOf(prop.getProperty("commonEntities"));
  algorithm = prop.getProperty("algorithm");
}
项目:AGDISTIS    文件:CandidateUtil.java   
public CandidateUtil() throws IOException {
    Properties prop = new Properties();
    InputStream input = CandidateUtil.class.getResourceAsStream("/config/agdistis.properties");
    prop.load(input);

    String envNodeType = System.getenv("AGDISTIS_NODE_TYPE");
    this.nodeType = envNodeType != null ? envNodeType : prop.getProperty("nodeType");
    String envNgramDistance = System.getenv("AGDISTIS_NGRAM_DISTANCE");
    this.nGramDistance = new NGramDistance(
            Integer.valueOf(envNgramDistance != null ? envNgramDistance : prop.getProperty("ngramDistance")));
    this.index = new TripleIndex();
    String envContext = System.getenv("AGDISTIS_CONTEXT");
    this.context = Boolean.valueOf(envContext != null ? envContext : prop.getProperty("context"));
    if (context == true) { // in case the index by context exist
        this.index2 = new TripleIndexContext();
    }
    this.corporationAffixCleaner = new CorporationAffixCleaner();
    this.domainWhiteLister = new DomainWhiteLister(index);
    String envPopularity = System.getenv("AGDISTIS_POPULARITY");
    this.popularity = Boolean.valueOf(envPopularity != null ? envPopularity : prop.getProperty("popularity"));
    String envAcronym = System.getenv("AGDISTIS_ACRONYM");
    this.acronym = Boolean.valueOf(envAcronym != null ? envAcronym : prop.getProperty("acronym"));
    String envCommonEntities = System.getenv("AGDISTIS_COMMON_ENTITIES");
    this.commonEntities = Boolean
            .valueOf(envCommonEntities != null ? envCommonEntities : prop.getProperty("commonEntities"));
    String envAlgorithm = System.getenv("AGDISTIS_ALGORITHM");
    this.algorithm = envAlgorithm != null ? envAlgorithm : prop.getProperty("algorithm");
}
项目:AGDISTIS    文件:TripleIndexTest.java   
@Test
public void testSurfaceFormsDistance() {
    String candidateURL = "http://dbpedia.org/resource/Barack_Obama";
    List<Triple> label = index.search(candidateURL, "http://www.w3.org/2000/01/rdf-schema#label", null);
    List<Triple> surfaceForms = index.search(candidateURL, "http://www.w3.org/2004/02/skos/core#altLabel", null);
    log.debug(" * " + surfaceForms.size());
    NGramDistance n = new NGramDistance(3);
    for (Triple t : surfaceForms) {
        log.debug(label.get(0).getObject() + " " + t.getObject() + " : "
                + n.getDistance(label.get(0).getObject(), t.getObject()));
        assertTrue(n.getDistance(label.get(0).getObject(), t.getObject()) >= 0);

    }
}
项目:elasticsearch_my    文件:TermSuggestionBuilder.java   
@Override
public StringDistance toLucene() {
    return new NGramDistance();
}