Java 类org.apache.lucene.search.spell.SuggestMode 实例源码

项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
        double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score,
                score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}
项目:Elasticsearch    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood,  int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
项目:Elasticsearch    文件:DirectCandidateGenerator.java   
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}
项目:SolrPlugins    文件:DiceMultipleCaseSuggester.java   
private List<LookupResult> getLookupResults(SpellingOptions options, Token currentToken) throws IOException {
    CharsRef scratch = new CharsRef();
    scratch.chars = currentToken.buffer();
    scratch.offset = 0;
    scratch.length = currentToken.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
            !(lookup instanceof WFSTCompletionLookup) &&
            !(lookup instanceof AnalyzingSuggester);

    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null || suggestions.size() == 0) {
        return null;
    }

    return suggestions;
}
项目:elasticsearch_my    文件:DirectCandidateGeneratorBuilder.java   
private static SuggestMode resolveSuggestMode(String suggestMode) {
    suggestMode = suggestMode.toLowerCase(Locale.US);
    if ("missing".equals(suggestMode)) {
        return SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
    } else if ("popular".equals(suggestMode)) {
        return SuggestMode.SUGGEST_MORE_POPULAR;
    } else if ("always".equals(suggestMode)) {
        return SuggestMode.SUGGEST_ALWAYS;
    } else {
        throw new IllegalArgumentException("Illegal suggest mode " + suggestMode);
    }
}
项目:preDict    文件:LuceneWordSearch.java   
private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
    SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
    List<String> result = new ArrayList<>();
    for(SuggestWord suggestion : suggestions) {
        result.add(suggestion.string);
    }
    return result;
}
项目:search    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:search    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, int alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:search    文件:Suggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  LOG.debug("getSuggestions: " + options.tokens);
  if (lookup == null) {
    LOG.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}
项目:NYBC    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:NYBC    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, Integer alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:NYBC    文件:Suggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  LOG.debug("getSuggestions: " + options.tokens);
  if (lookup == null) {
    LOG.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}
项目:search-core    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:search-core    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, Integer alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:search-core    文件:Suggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  LOG.debug("getSuggestions: " + options.tokens);
  if (lookup == null) {
    LOG.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}
项目:read-open-source-code    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:read-open-source-code    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, Integer alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:read-open-source-code    文件:Suggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  LOG.debug("getSuggestions: " + options.tokens);
  if (lookup == null) {
    LOG.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}
项目:read-open-source-code    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:read-open-source-code    文件:SpellingOptions.java   
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, int alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
项目:read-open-source-code    文件:Suggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  LOG.debug("getSuggestions: " + options.tokens);
  if (lookup == null) {
    LOG.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}
项目:elasticsearch_my    文件:DirectSpellcheckerSettings.java   
public SuggestMode suggestMode() {
    return suggestMode;
}
项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
        double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
项目:Elasticsearch    文件:DirectSpellcheckerSettings.java   
public SuggestMode suggestMode() {
    return suggestMode;
}
项目:Elasticsearch    文件:DirectCandidateGenerator.java   
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader,  nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
项目:search    文件:WordBreakSpellChecker.java   
/**
 * <p>
 * Generate suggestions by breaking the passed-in term into multiple words.
 * The scores returned are equal to the number of word breaks needed so a
 * lower score is generally preferred over a higher score.
 * </p>
 * 
 * @param suggestMode
 *          - default = {@link SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX}
 * @param sortMethod
 *          - default =
 *          {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
 * @return one or more arrays of words formed by breaking up the original term
 * @throws IOException If there is a low-level I/O error.
 */
public SuggestWord[][] suggestWordBreaks(Term term, int maxSuggestions,
    IndexReader ir, SuggestMode suggestMode,
    BreakSuggestionSortMethod sortMethod) throws IOException {
  if (maxSuggestions < 1) {
    return new SuggestWord[0][0];
  }
  if (suggestMode == null) {
    suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
  }
  if (sortMethod == null) {
    sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
  }

  int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
  Comparator<SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator()
      : new LengthThenSumFreqComparator();
  Queue<SuggestWordArrayWrapper> suggestions = new PriorityQueue<>(
      queueInitialCapacity, queueComparator);

  int origFreq = ir.docFreq(term);
  if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) {
    return new SuggestWord[0][];
  }

  int useMinSuggestionFrequency = minSuggestionFrequency;
  if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) {
    useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq);
  }

  generateBreakUpSuggestions(term, ir, 1, maxSuggestions,
      useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0,
      sortMethod);

  SuggestWord[][] suggestionArray = new SuggestWord[suggestions.size()][];
  for (int i = suggestions.size() - 1; i >= 0; i--) {
    suggestionArray[i] = suggestions.remove().suggestWords;
  }

  return suggestionArray;
}
项目:search    文件:IndexBasedSpellCheckerTest.java   
@Test
public void testExtendedResults() throws Exception {
  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = createTempDir();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  RefCounted<SolrIndexSearcher> holder = core.getSearcher();
  SolrIndexSearcher searcher = holder.get();
  try {
  checker.build(core, searcher);

  IndexReader reader = searcher.getIndexReader();
  Collection<Token> tokens = queryConverter.convert("documemt");
  SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
  SpellingResult result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  //should be lowercased, b/c we are using a lowercasing analyzer
  Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("documemt is null and it shouldn't be", suggestions != null);
  assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
  Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
  assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
  assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);

  //test something not in the spell checker
  spellOpts.tokens = queryConverter.convert("super");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions size should be 0", suggestions.size()==0);

  spellOpts.tokens = queryConverter.convert("document");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions is not null and it should be", suggestions == null);
  } finally {
    holder.decref();
  }
}
项目:search    文件:IndexBasedSpellCheckerTest.java   
@Test
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File tmpDir = createTempDir();
  File indexDir = new File(tmpDir, "spellingIdx");
  //create a standalone index
  File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir);
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer())
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  RefCounted<SolrIndexSearcher> holder = core.getSearcher();
  SolrIndexSearcher searcher = holder.get();
  try {
  checker.build(core, searcher);

  IndexReader reader = searcher.getIndexReader();
  Collection<Token> tokens = queryConverter.convert("flesh");
  SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
  SpellingResult result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  //should be lowercased, b/c we are using a lowercasing analyzer
  Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("flesh is null and it shouldn't be", suggestions != null);
  assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
  Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
  assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
  assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

  //test something not in the spell checker
  spellOpts.tokens = queryConverter.convert("super");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions size should be 0", suggestions.size()==0);

  spellOpts.tokens = queryConverter.convert("Caroline");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions is not null and it should be", suggestions == null);
  } finally {
    holder.decref();
  }
}
项目:SolrPlugins    文件:DiceSpellCheckComponent.java   
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
        return;
    }
    boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
    String q = params.get(SPELLCHECK_Q);
    SolrSpellChecker spellChecker = getSpellChecker(params);
    Collection<Token> tokens = null;

    if (q == null) {
        // enforce useage of the spellcheck.q parameter - i.e. a query we can tokenize with a regular tokenizer and not
        // a solr query for the spell checking. Useage of the SolrQueryConverter is buggy and breaks frequently
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The spellcheck.q parameter is required.");
    } else {
        //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
        tokens = getTokens(q, spellChecker.getQueryAnalyzer());
    }
    if (tokens != null && tokens.isEmpty() == false) {
        if (spellChecker != null) {
            int count = params.getInt(SPELLCHECK_COUNT, 1);
            boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
            boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
            boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
            float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
            Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT);
            Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
            ModifiableSolrParams customParams = new ModifiableSolrParams();
            for (String checkerName : getDictionaryNames(params)) {
                customParams.add(getCustomParams(checkerName, params));
            }

            Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits");
            long hits = 0;
            if (hitsInteger == null) {
                hits = rb.getNumberDocumentsFound();
            } else {
                hits = hitsInteger.longValue();
            }
            SpellingResult spellingResult = null;
            if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
                SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
                if (onlyMorePopular) {
                    suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
                } else if (alternativeTermCount != null) {
                    suggestMode = SuggestMode.SUGGEST_ALWAYS;
                }

                IndexReader reader = rb.req.getSearcher().getIndexReader();
                SpellingOptions options = new SpellingOptions(tokens, reader, count,
                        alternativeTermCount, suggestMode, extendedResults, accuracy,
                        customParams);
                spellingResult = spellChecker.getSuggestions(options);
            } else {
                spellingResult = new SpellingResult();
            }
            boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
            NamedList suggestions = toNamedList(shardRequest, spellingResult, q,
                    extendedResults, collate, isCorrectlySpelled);
            if (collate) {
                ModifiableSolrParams modParams = new ModifiableSolrParams(params);
                // SH: having both spellcheck.q and q set screws up collations for some queries, such as "java develope"
                modParams.remove(CommonParams.Q);

                //SH: Note that the collator runs a query against the DF specified field. Ideally it should
                //run the query against the spellchecker field but that's inaccessible here
                addCollationsToResponse(modParams, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap());
            }
            NamedList response = new SimpleOrderedMap();
            response.add("suggestions", suggestions);
            rb.rsp.add("spellcheck", response);

        } else {
            throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
                    "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
        }
    }
}
项目:SolrPlugins    文件:DiceSuggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }
    SpellingResult res = new SpellingResult();
    CharsRef scratch = new CharsRef();

    for (Token currentToken : options.tokens) {
        scratch.chars = currentToken.buffer();
        scratch.offset = 0;
        scratch.length = currentToken.length();
        boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
                !(lookup instanceof WFSTCompletionLookup) &&
                !(lookup instanceof AnalyzingSuggester);

        // get more than the requested suggestions as a lot get collapsed by the corrections
        List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count * 10);
        if (suggestions == null || suggestions.size() == 0) {
            continue;
        }

        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        }

        final LinkedHashMap<String, Integer> lhm = new LinkedHashMap<String, Integer>();
        for (LookupResult lr : suggestions) {
            String suggestion = lr.key.toString();
            if(this.suggestionAnalyzer != null) {
                String correction = getAnalyzerResult(suggestion);
                // multiple could map to the same, so don't repeat suggestions
                if(!isStringNullOrEmpty(correction)){
                    if(lhm.containsKey(correction)){
                        lhm.put(correction, lhm.get(correction) + (int) lr.value);
                    }
                    else {
                        lhm.put(correction, (int) lr.value);
                    }
                }
            }
            else {
                lhm.put(suggestion, (int) lr.value);
            }

            if(lhm.size() >= options.count){
                break;
            }
        }

        // sort by new doc frequency
        Map<String, Integer> orderedMap = null;
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR){
            // retain the sort order from above
            orderedMap = lhm;
        }
        else {
            orderedMap = new TreeMap<String, Integer>(new Comparator<String>() {
                @Override
                public int compare(String s1, String s2) {
                    return lhm.get(s2).compareTo(lhm.get(s1));
                }
            });
            orderedMap.putAll(lhm);
        }

        for(Map.Entry<String, Integer> entry: orderedMap.entrySet()){
            res.add(currentToken, entry.getKey(), entry.getValue());
        }

    }
    return res;
}
项目:SolrPlugins    文件:DiceMultipleCaseSuggester.java   
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
    LOG.debug("getSuggestions: " + options.tokens);
    if (lookup == null) {
        LOG.info("Lookup is null - invoke spellchecker.build first");
        return EMPTY_RESULT;
    }

    SpellingResult res = new SpellingResult();
    for (Token currentToken : options.tokens) {
        String tokenText = currentToken.toString();

        // we need to ensure that we combine matches for different cases, and take the most common
        // where multiple case versions exist
        final Hashtable<String, LookupResult> htSuggestions = new Hashtable<String, LookupResult>();
        final Hashtable<String, Integer> htSuggestionCounts = new Hashtable<String, Integer>();

        for(String sToken: generateCaseVariants(tokenText)){

            Token newToken = newToken(currentToken, sToken);
            List<LookupResult> tmpSuggestions = getLookupResults(options, newToken);
            if(tmpSuggestions != null){
                for(LookupResult lu: tmpSuggestions) {
                    final String key = lu.key.toString().toLowerCase();
                    LookupResult existing = htSuggestions.get(key);
                    if(existing != null) {
                        // replace if more frequent
                        if (lu.value > existing.value) {
                            htSuggestions.put(key, lu);
                        }
                        htSuggestionCounts.put(key, htSuggestionCounts.get(key) + (int)lu.value);
                    }
                    else{
                        htSuggestions.put(key, lu);
                        htSuggestionCounts.put(key, (int)lu.value);
                    }
                }
            }
        }

        List<String> suggestions = new ArrayList<String>(htSuggestions.keySet());
        if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
            Collections.sort(suggestions);
        }
        else{
            Collections.sort(suggestions, new Comparator<String>() {
                public int compare(String sug1, String sug2) {
                    int sug1Count = htSuggestionCounts.get(sug1);
                    int sug2Count = htSuggestionCounts.get(sug2);
                    return sug2Count - sug1Count;
                }
            });
        }

        for (String match : suggestions) {
            LookupResult lr = htSuggestions.get(match);
            res.add(currentToken, lr.key.toString(), (int)lr.value);
        }

    }
    return res;
}
项目:NYBC    文件:WordBreakSpellChecker.java   
/**
 * <p>
 * Generate suggestions by breaking the passed-in term into multiple words.
 * The scores returned are equal to the number of word breaks needed so a
 * lower score is generally preferred over a higher score.
 * </p>
 * 
 * @param suggestMode
 *          - default = {@link SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX}
 * @param sortMethod
 *          - default =
 *          {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
 * @return one or more arrays of words formed by breaking up the original term
 * @throws IOException If there is a low-level I/O error.
 */
public SuggestWord[][] suggestWordBreaks(Term term, int maxSuggestions,
    IndexReader ir, SuggestMode suggestMode,
    BreakSuggestionSortMethod sortMethod) throws IOException {
  if (maxSuggestions < 1) {
    return new SuggestWord[0][0];
  }
  if (suggestMode == null) {
    suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
  }
  if (sortMethod == null) {
    sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
  }

  int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
  Comparator<SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator()
      : new LengthThenSumFreqComparator();
  Queue<SuggestWordArrayWrapper> suggestions = new PriorityQueue<SuggestWordArrayWrapper>(
      queueInitialCapacity, queueComparator);

  int origFreq = ir.docFreq(term);
  if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) {
    return new SuggestWord[0][];
  }

  int useMinSuggestionFrequency = minSuggestionFrequency;
  if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) {
    useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq);
  }

  generateBreakUpSuggestions(term, ir, 1, maxSuggestions,
      useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0,
      sortMethod);

  SuggestWord[][] suggestionArray = new SuggestWord[suggestions.size()][];
  for (int i = suggestions.size() - 1; i >= 0; i--) {
    suggestionArray[i] = suggestions.remove().suggestWords;
  }

  return suggestionArray;
}
项目:NYBC    文件:SpellCheckComponent.java   
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
  SolrParams params = rb.req.getParams();
  if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
    return;
  }
  boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
  String q = params.get(SPELLCHECK_Q);
  SolrSpellChecker spellChecker = getSpellChecker(params);
  Collection<Token> tokens = null;

  if (q != null) {
    //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
    tokens = getTokens(q, spellChecker.getQueryAnalyzer());
  } else {
    q = rb.getQueryString();
    if (q == null) {
      q = params.get(CommonParams.Q);
    }
    tokens = queryConverter.convert(q);
  }
  if (tokens != null && tokens.isEmpty() == false) {
    if (spellChecker != null) {
      int count = params.getInt(SPELLCHECK_COUNT, 1);
      boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
      boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); 
      boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
      float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
      Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); 
      Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
      ModifiableSolrParams customParams = new ModifiableSolrParams();
      for (String checkerName : getDictionaryNames(params)) {
        customParams.add(getCustomParams(checkerName, params));
      }

      Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits");
      long hits = 0;
      if (hitsInteger == null) {
        hits = rb.getNumberDocumentsFound();
      } else {
        hits = hitsInteger.longValue();
      }
      SpellingResult spellingResult = null;
      if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
        SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
        if (onlyMorePopular) {
          suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
        } else if (alternativeTermCount != null) {
          suggestMode = SuggestMode.SUGGEST_ALWAYS;
        }

        IndexReader reader = rb.req.getSearcher().getIndexReader();
        SpellingOptions options = new SpellingOptions(tokens, reader, count,
            alternativeTermCount, suggestMode, extendedResults, accuracy,
            customParams);
        spellingResult = spellChecker.getSuggestions(options);
      } else {
        spellingResult = new SpellingResult();
      }
      boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
      NamedList suggestions = toNamedList(shardRequest, spellingResult, q,
          extendedResults, collate, isCorrectlySpelled);
      if (collate) {
        addCollationsToResponse(params, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap());
      }
      NamedList response = new SimpleOrderedMap();
      response.add("suggestions", suggestions);
      rb.rsp.add("spellcheck", response);

    } else {
      throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
          "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
    }
  }
}
项目:NYBC    文件:IndexBasedSpellCheckerTest.java   
@Test
public void testExtendedResults() throws Exception {
  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime());
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  RefCounted<SolrIndexSearcher> holder = core.getSearcher();
  SolrIndexSearcher searcher = holder.get();
  try {
  checker.build(core, searcher);

  IndexReader reader = searcher.getIndexReader();
  Collection<Token> tokens = queryConverter.convert("documemt");
  SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
  SpellingResult result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  //should be lowercased, b/c we are using a lowercasing analyzer
  Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("documemt is null and it shouldn't be", suggestions != null);
  assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
  Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
  assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
  assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);

  //test something not in the spell checker
  spellOpts.tokens = queryConverter.convert("super");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions size should be 0", suggestions.size()==0);

  spellOpts.tokens = queryConverter.convert("document");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions is not null and it should be", suggestions == null);
  } finally {
    holder.decref();
  }
}
项目:NYBC    文件:IndexBasedSpellCheckerTest.java   
@Test
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime());
  //create a standalone index
  File altIndexDir = new File(TEMP_DIR, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir);
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  RefCounted<SolrIndexSearcher> holder = core.getSearcher();
  SolrIndexSearcher searcher = holder.get();
  try {
  checker.build(core, searcher);

  IndexReader reader = searcher.getIndexReader();
  Collection<Token> tokens = queryConverter.convert("flesh");
  SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
  SpellingResult result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  //should be lowercased, b/c we are using a lowercasing analyzer
  Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("flesh is null and it shouldn't be", suggestions != null);
  assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
  Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
  assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
  assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

  //test something not in the spell checker
  spellOpts.tokens = queryConverter.convert("super");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions size should be 0", suggestions.size()==0);

  spellOpts.tokens = queryConverter.convert("Caroline");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions is not null and it should be", suggestions == null);
  } finally {
    holder.decref();
  }
}
项目:search-core    文件:SpellCheckComponent.java   
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
  SolrParams params = rb.req.getParams();
  if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
    return;
  }
  boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
  String q = params.get(SPELLCHECK_Q);
  SolrSpellChecker spellChecker = getSpellChecker(params);
  Collection<Token> tokens = null;

  if (q != null) {
    //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
    tokens = getTokens(q, spellChecker.getQueryAnalyzer());
  } else {
    q = rb.getQueryString();
    if (q == null) {
      q = params.get(CommonParams.Q);
    }
    tokens = queryConverter.convert(q);
  }
  if (tokens != null && tokens.isEmpty() == false) {
    if (spellChecker != null) {
      int count = params.getInt(SPELLCHECK_COUNT, 1);
      boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
      boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); 
      boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
      float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
      Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT); 
      Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
      ModifiableSolrParams customParams = new ModifiableSolrParams();
      for (String checkerName : getDictionaryNames(params)) {
        customParams.add(getCustomParams(checkerName, params));
      }

      Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits");
      long hits = 0;
      if (hitsInteger == null) {
        hits = rb.getNumberDocumentsFound();
      } else {
        hits = hitsInteger.longValue();
      }
      SpellingResult spellingResult = null;
      if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
        SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
        if (onlyMorePopular) {
          suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
        } else if (alternativeTermCount != null) {
          suggestMode = SuggestMode.SUGGEST_ALWAYS;
        }

        IndexReader reader = rb.req.getSearcher().getIndexReader();
        SpellingOptions options = new SpellingOptions(tokens, reader, count,
            alternativeTermCount, suggestMode, extendedResults, accuracy,
            customParams);
        spellingResult = spellChecker.getSuggestions(options);
      } else {
        spellingResult = new SpellingResult();
      }
      boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
      NamedList suggestions = toNamedList(shardRequest, spellingResult, q,
          extendedResults, collate, isCorrectlySpelled);
      if (collate) {
        addCollationsToResponse(params, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap());
      }
      NamedList response = new SimpleOrderedMap();
      response.add("suggestions", suggestions);
      rb.rsp.add("spellcheck", response);

    } else {
      throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
          "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
    }
  }
}
项目:search-core    文件:IndexBasedSpellCheckerTest.java   
@Test
public void testExtendedResults() throws Exception {
  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime());
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  RefCounted<SolrIndexSearcher> holder = core.getSearcher();
  SolrIndexSearcher searcher = holder.get();
  try {
  checker.build(core, searcher);

  IndexReader reader = searcher.getIndexReader();
  Collection<Token> tokens = queryConverter.convert("documemt");
  SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
  SpellingResult result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  //should be lowercased, b/c we are using a lowercasing analyzer
  Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("documemt is null and it shouldn't be", suggestions != null);
  assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
  Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
  assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
  assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);

  //test something not in the spell checker
  spellOpts.tokens = queryConverter.convert("super");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions size should be 0", suggestions.size()==0);

  spellOpts.tokens = queryConverter.convert("document");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions is not null and it should be", suggestions == null);
  } finally {
    holder.decref();
  }
}
项目:search-core    文件:IndexBasedSpellCheckerTest.java   
@Test
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = new File(TEMP_DIR, "spellingIdx" + new Date().getTime());
  //create a standalone index
  File altIndexDir = new File(TEMP_DIR, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir);
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT))
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  RefCounted<SolrIndexSearcher> holder = core.getSearcher();
  SolrIndexSearcher searcher = holder.get();
  try {
  checker.build(core, searcher);

  IndexReader reader = searcher.getIndexReader();
  Collection<Token> tokens = queryConverter.convert("flesh");
  SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
  SpellingResult result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  //should be lowercased, b/c we are using a lowercasing analyzer
  Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("flesh is null and it shouldn't be", suggestions != null);
  assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
  Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
  assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
  assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

  //test something not in the spell checker
  spellOpts.tokens = queryConverter.convert("super");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions size should be 0", suggestions.size()==0);

  spellOpts.tokens = queryConverter.convert("Caroline");
  result = checker.getSuggestions(spellOpts);
  assertTrue("result is null and it shouldn't be", result != null);
  suggestions = result.get(spellOpts.tokens.iterator().next());
  assertTrue("suggestions is not null and it should be", suggestions == null);
  } finally {
    holder.decref();
  }
}
项目:read-open-source-code    文件:WordBreakSpellChecker.java   
/**
 * <p>
 * Generate suggestions by breaking the passed-in term into multiple words.
 * The scores returned are equal to the number of word breaks needed so a
 * lower score is generally preferred over a higher score.
 * </p>
 * 
 * @param suggestMode
 *          - default = {@link SuggestMode#SUGGEST_WHEN_NOT_IN_INDEX}
 * @param sortMethod
 *          - default =
 *          {@link BreakSuggestionSortMethod#NUM_CHANGES_THEN_MAX_FREQUENCY}
 * @return one or more arrays of words formed by breaking up the original term
 * @throws IOException If there is a low-level I/O error.
 */
public SuggestWord[][] suggestWordBreaks(Term term, int maxSuggestions,
    IndexReader ir, SuggestMode suggestMode,
    BreakSuggestionSortMethod sortMethod) throws IOException {
  if (maxSuggestions < 1) {
    return new SuggestWord[0][0];
  }
  if (suggestMode == null) {
    suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
  }
  if (sortMethod == null) {
    sortMethod = BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY;
  }

  int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
  Comparator<SuggestWordArrayWrapper> queueComparator = sortMethod == BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY ? new LengthThenMaxFreqComparator()
      : new LengthThenSumFreqComparator();
  Queue<SuggestWordArrayWrapper> suggestions = new PriorityQueue<SuggestWordArrayWrapper>(
      queueInitialCapacity, queueComparator);

  int origFreq = ir.docFreq(term);
  if (origFreq > 0 && suggestMode == SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX) {
    return new SuggestWord[0][];
  }

  int useMinSuggestionFrequency = minSuggestionFrequency;
  if (suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) {
    useMinSuggestionFrequency = (origFreq == 0 ? 1 : origFreq);
  }

  generateBreakUpSuggestions(term, ir, 1, maxSuggestions,
      useMinSuggestionFrequency, new SuggestWord[0], suggestions, 0,
      sortMethod);

  SuggestWord[][] suggestionArray = new SuggestWord[suggestions.size()][];
  for (int i = suggestions.size() - 1; i >= 0; i--) {
    suggestionArray[i] = suggestions.remove().suggestWords;
  }

  return suggestionArray;
}