Java 类org.apache.lucene.search.spell.LevensteinDistance 实例源码

项目:elasticsearch_my    文件:DirectCandidateGeneratorBuilder.java   
private static StringDistance resolveDistance(String distanceVal) {
    distanceVal = distanceVal.toLowerCase(Locale.US);
    if ("internal".equals(distanceVal)) {
        return DirectSpellChecker.INTERNAL_LEVENSHTEIN;
    } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) {
        return new LuceneLevenshteinDistance();
    } else if ("levenstein".equals(distanceVal)) {
        return new LevensteinDistance();
        // TODO Jaro and Winkler are 2 people - so apply same naming logic
        // as damerau_levenshtein
    } else if ("jarowinkler".equals(distanceVal)) {
        return new JaroWinklerDistance();
    } else if ("ngram".equals(distanceVal)) {
        return new NGramDistance();
    } else {
        throw new IllegalArgumentException("Illegal distance option " + distanceVal);
    }
}
项目:elasticsearch_my    文件:AbstractScopedSettings.java   
/**
 * Validates that the setting is valid
 */
public final void validate(String key, Settings settings) {
    Setting setting = get(key);
    if (setting == null) {
        LevensteinDistance ld = new LevensteinDistance();
        List<Tuple<Float, String>> scoredKeys = new ArrayList<>();
        for (String k : this.keySettings.keySet()) {
            float distance = ld.getDistance(key, k);
            if (distance > 0.7f) {
                scoredKeys.add(new Tuple<>(distance, k));
            }
        }
        CollectionUtil.timSort(scoredKeys, (a,b) -> b.v1().compareTo(a.v1()));
        String msgPrefix = "unknown setting";
        SecureSettings secureSettings = settings.getSecureSettings();
        if (secureSettings != null && settings.getSecureSettings().getSettingNames().contains(key)) {
            msgPrefix = "unknown secure setting";
        }
        String msg = msgPrefix + " [" + key + "]";
        List<String> keys = scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList());
        if (keys.isEmpty() == false) {
            msg += " did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]": "any of " + keys.toString()) + "?";
        } else {
            msg += " please check that any required plugins are installed, or check the breaking changes documentation for removed " +
                "settings";
        }
        throw new IllegalArgumentException(msg);
    }
    setting.get(settings);
}
项目:semtool    文件:CheckConsistencyPanel.java   
public CheckConsistencyPanel() {
    initComponents();
    conceptList.setCellRenderer( crenderer );
    relationList.setCellRenderer( rrenderer );
    LabeledPairRenderer<StringDistance> arend = new LabeledPairRenderer<>();
    algorithm.setRenderer( arend );

    Map<StringDistance, String> dists = new LinkedHashMap<>();
    dists.put( new LevensteinDistance(), "Levenstein" );
    dists.put( new DoubleMetaphoneDistance(), "Double Metaphone" );
    dists.put( new MetaphoneDistance(), "Metaphone" );
    dists.put( new SoundexDistance(), "Soundex" );
    arend.cache( dists );

    for( StringDistance s : dists.keySet() ){
        algorithm.addItem( s );
    }
}
项目:t4f-data    文件:SpellCheckerExample.java   
public static void main(String[] args) throws IOException {

        if (args.length != 2) {
            LOGGER.info("Usage: java lia.tools.SpellCheckerTest SpellCheckerIndexDir wordToRespell");
            System.exit(1);
        }

        String spellCheckDir = args[0];
        String wordToRespell = args[1];

        Directory dir = FSDirectory.open(new File(spellCheckDir));
        if (!IndexReader.indexExists(dir)) {
            LOGGER.info("\nERROR: No spellchecker index at path \"" + spellCheckDir
                    + "\"; please run CreateSpellCheckerIndex first\n");
            System.exit(1);
        }
        SpellChecker spell = new SpellChecker(dir); // #A

        spell.setStringDistance(new LevensteinDistance()); // #B
        // spell.setStringDistance(new JaroWinklerDistance());

        String[] suggestions = spell.suggestSimilar(wordToRespell, 5); // #C
        LOGGER.info(suggestions.length + " suggestions for '" + wordToRespell + "':");
        for (String suggestion : suggestions)
            LOGGER.info("  " + suggestion);
    }
项目:elasticsearch_my    文件:BaseRestHandler.java   
protected final String unrecognized(
    final RestRequest request,
    final Set<String> invalids,
    final Set<String> candidates,
    final String detail) {
    String message = String.format(
        Locale.ROOT,
        "request [%s] contains unrecognized %s%s: ",
        request.path(),
        detail,
        invalids.size() > 1 ? "s" : "");
    boolean first = true;
    for (final String invalid : invalids) {
        final LevensteinDistance ld = new LevensteinDistance();
        final List<Tuple<Float, String>> scoredParams = new ArrayList<>();
        for (final String candidate : candidates) {
            final float distance = ld.getDistance(invalid, candidate);
            if (distance > 0.5f) {
                scoredParams.add(new Tuple<>(distance, candidate));
            }
        }
        CollectionUtil.timSort(scoredParams, (a, b) -> {
            // sort by distance in reverse order, then parameter name for equal distances
            int compare = a.v1().compareTo(b.v1());
            if (compare != 0) return -compare;
            else return a.v2().compareTo(b.v2());
        });
        if (first == false) {
            message += ", ";
        }
        message += "[" + invalid + "]";
        final List<String> keys = scoredParams.stream().map(Tuple::v2).collect(Collectors.toList());
        if (keys.isEmpty() == false) {
            message += " -> did you mean " + (keys.size() == 1 ? "[" + keys.get(0) + "]" : "any of " + keys.toString()) + "?";
        }
        first = false;
    }

    return message;
}
项目:elasticsearch_my    文件:InstallPluginCommand.java   
/** Returns all the official plugin names that look similar to pluginId. **/
private List<String> checkMisspelledPlugin(String pluginId) {
    LevensteinDistance ld = new LevensteinDistance();
    List<Tuple<Float, String>> scoredKeys = new ArrayList<>();
    for (String officialPlugin : OFFICIAL_PLUGINS) {
        float distance = ld.getDistance(pluginId, officialPlugin);
        if (distance > 0.7f) {
            scoredKeys.add(new Tuple<>(distance, officialPlugin));
        }
    }
    CollectionUtil.timSort(scoredKeys, (a, b) -> b.v1().compareTo(a.v1()));
    return scoredKeys.stream().map((a) -> a.v2()).collect(Collectors.toList());
}
项目:brigen-base    文件:LuceneDelegaterImpl.java   
private static void check() {
    try {
        Class.forName(JaroWinklerDistance.class.getName());
        Class.forName(LevensteinDistance.class.getName());
        Class.forName(StringDistance.class.getName());
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}
项目:elasticsearch_my    文件:TermSuggestionBuilder.java   
@Override
public StringDistance toLucene() {
    return new LevensteinDistance();
}
项目:semtool    文件:EngineConsistencyCheckerTest.java   
@Before
public void setUp() {
    ecc = new EngineConsistencyChecker( engine, false, new LevensteinDistance() );
}
项目:related-searches    文件:TimeClickAndDistanceEvaluation.java   
/**
 * Main class.
 * 
 * @param args
 *          arguments
 * @throws IOException
 *           throw when error occurs
 */
public static void main(String[] args) throws IOException {
  if (args.length != 11 && args.length != 12) {
    System.err.println("Usage:");
    System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> "
        + "<suggestion_threshold> <similarity_threshold> <more_similar> <distance_boost> "
        + "<timebased_boost> <non_zero_hits_only> <query_log> <output_file>");
    System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> "
        + "<suggestion_threshold> <similarity_threshold> <more_similar> <distance_boost> "
        + "<timebased_boost> <non_zero_hits_only> <query_log> <es_host> <es_index>");
    System.exit(-1);
  }

  String redisHost = args[0];
  int redisPort = Integer.parseInt(args[1]);
  int timeBetweenQueries = Integer.parseInt(args[2]);
  double suggestionThreshold = Double.parseDouble(args[3]);
  float similarityThreshold = Float.parseFloat(args[4]);
  boolean moreSimilar = Boolean.parseBoolean(args[5]);
  float distanceBoost = Float.parseFloat(args[6]);
  float timebasedBoost = Float.parseFloat(args[7]);
  boolean nonZeroHitsOnly = Boolean.parseBoolean(args[8]);
  String queryLogPath = args[9];
  String outputFile = args[10];
  String esIndex = null;
  if (args.length == 12) {
    esIndex = args[11];
  }

  List<Float> boosts = new ArrayList<Float>();
  boosts.add(distanceBoost);
  boosts.add(timebasedBoost);

  List<String> prefixes = new ArrayList<String>();
  prefixes.add(Settings.STRING_DISTANCE_PREFIX);
  prefixes.add(Settings.TIME_CLICK_PREFIX);

  LookBackTrigger lookBackTrigger = null;

  SegmentProcessorQueriesEvaluator stringDistanceApproach = new SegmentProcessorQueriesEvaluator(redisHost, redisPort,
      queryLogPath, new TimeBasedLookBackStrategy(60 * 1000), lookBackTrigger,
      new SimilarityCond(new LevensteinDistance(), similarityThreshold, moreSimilar), suggestionThreshold,
      Settings.STRING_DISTANCE_PREFIX, nonZeroHitsOnly);

  SegmentProcessorQueriesEvaluator timeBasedApproach = new SegmentProcessorQueriesEvaluator(redisHost, redisPort,
      queryLogPath, new TimeBasedLookBackStrategy(timeBetweenQueries * 1000), lookBackTrigger,
      new SimilarityCond(new LevensteinDistance(), 0.1f, true), suggestionThreshold, Settings.TIME_CLICK_PREFIX,
      nonZeroHitsOnly);

  List<SegmentProcessorQueriesEvaluator> approachesList = new ArrayList<SegmentProcessorQueriesEvaluator>();
  approachesList.add(stringDistanceApproach);
  approachesList.add(timeBasedApproach);

  OutputWriter writer = null;
  boolean isEsEnabled = Boolean.parseBoolean(System.getProperty(Settings.ES_OUTPUT_ENABLED) != null
      ? System.getProperty(Settings.ES_OUTPUT_ENABLED) : "false");
  if (isEsEnabled) {
    writer = new CombinedQueriesEvaluatorESWriter(outputFile, esIndex, boosts, prefixes, 2, nonZeroHitsOnly);
  } else {
    writer = new CombinedQueriesEvaluatorFileWriter(outputFile, boosts, prefixes, 2, nonZeroHitsOnly);
  }

  writer.write(approachesList);
  writer.close();
}
项目:related-searches    文件:BasicRelatedSearchesEvaluator.java   
/**
 * Main class.
 * 
 * @param args
 *          arguments
 * @throws IOException
 *           throw when error occurs
 */
public static void main(String[] args) throws IOException {
  if (args.length < 7 || args.length > 9) {
    System.err.println("Usage:");
    System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> "
        + "<suggestion_threshold> <similarity_threshold> <more_similar> <query_log>");
    System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> "
        + "<suggestion_threshold> <similarity_threshold> <more_similar> <query_log> <output_file>");
    System.err.println("java -jar RelatedQueries.jar <redis_port> <redis_port> <time_between_queries> "
        + "<suggestion_threshold> <similarity_threshold> <more_similar> <query_log> <elasticsearch_address> <index_name>");
    System.exit(-1);
  }

  String redisHost = args[0];
  int redisPort = Integer.parseInt(args[1]);
  int timeBetweenQueries = Integer.parseInt(args[2]);
  double suggestionThreshold = Double.parseDouble(args[3]);
  float similarityThreshold = Float.parseFloat(args[4]);
  boolean moreSimilar = Boolean.parseBoolean(args[5]);
  String queryLogPath = args[6];
  String outputFile = null;
  String host = null;
  String index = null;
  if (args.length == 8) {
    outputFile = args[7];
  } else if (args.length == 9) {
    host = args[7];
    index = args[8];
  }

  List<SegmentProcessorQueriesEvaluator> sys = new LinkedList<SegmentProcessorQueriesEvaluator>();

  EntryAcceptCond conds[] = { new SimilarityCond(new LevensteinDistance(), similarityThreshold, moreSimilar) };

  // FIXME: extract to configuration
  LookBackStrategy strategies[] = { new TimeBasedLookBackStrategy(timeBetweenQueries * 1000) };

  // FIXME: extract to configuration
  LookBackTrigger triggers[] = { null };

  // initialize
  for (int i = 0; i < triggers.length; i++) {
    for (int j = 0; j < strategies.length; j++) {
      for (int k = 0; k < conds.length; k++) {
        sys.add(new SegmentProcessorQueriesEvaluator(redisHost, redisPort, queryLogPath, strategies[j], triggers[i],
            conds[k], suggestionThreshold, Settings.REDIS_PREFIX, false));
      }
    }
  }

  OutputWriter outputWriter;
  if (outputFile != null) {
    outputWriter = new SingleFileOutputWriter(outputFile, Settings.REDIS_PREFIX);
  } else if (host != null && index != null) {
    outputWriter = new ElasticsearchHTTPOutputWriter(host, index, Settings.REDIS_PREFIX);
  } else {
    outputWriter = new PerEvaluationFileOutputWriter(Settings.REDIS_PREFIX);
  }
  outputWriter.write(sys);
  outputWriter.close();
}