Java 类org.apache.lucene.util.CharsRefBuilder 实例源码

项目:search    文件:TestLimitTokenPositionFilter.java   
public void testMaxPosition3WithSynomyms() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = new MockTokenizer(new StringReader("one two three four five"), MockTokenizer.WHITESPACE, false);
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);

    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("one"), new CharsRef("first"), true);
    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
    CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
    builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
    builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
    SynonymMap synonymMap = builder.build();
    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
    stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
    assertTokenStreamContents(stream,
        new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
        new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
  }
}
项目:elasticsearch_my    文件:XAnalyzingSuggester.java   
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
  LookupResult result;
  if (hasPayloads) {
    int sepIndex = -1;
    for(int i=0;i<output2.length;i++) {
      if (output2.bytes[output2.offset+i] == payloadSep) {
        sepIndex = i;
        break;
      }
    }
    assert sepIndex != -1;
    final int payloadLen = output2.length - sepIndex - 1;
    spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
    BytesRef payload = new BytesRef(payloadLen);
    System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
    payload.length = payloadLen;
    result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
  } else {
    spare.copyUTF8Bytes(output2);
    result = new LookupResult(spare.toString(), decodeWeight(output1));
  }

  return result;
}
项目:elasticsearch_my    文件:ContextMappings.java   
/**
 * Wraps a {@link CompletionQuery} with context queries
 *
 * @param query base completion query to wrap
 * @param queryContexts a map of context mapping name and collected query contexts
 * @return a context-enabled query
 */
public ContextQuery toContextQuery(CompletionQuery query, Map<String, List<ContextMapping.InternalQueryContext>> queryContexts) {
    ContextQuery typedContextQuery = new ContextQuery(query);
    if (queryContexts.isEmpty() == false) {
        CharsRefBuilder scratch = new CharsRefBuilder();
        scratch.grow(1);
        for (int typeId = 0; typeId < contextMappings.size(); typeId++) {
            scratch.setCharAt(0, (char) typeId);
            scratch.setLength(1);
            ContextMapping mapping = contextMappings.get(typeId);
            List<ContextMapping.InternalQueryContext> internalQueryContext = queryContexts.get(mapping.name());
            if (internalQueryContext != null) {
                for (ContextMapping.InternalQueryContext context : internalQueryContext) {
                    scratch.append(context.context);
                    typedContextQuery.addContext(scratch.toCharsRef(), context.boost, !context.isPrefix);
                    scratch.setLength(1);
                }
            }
        }
    }
    return typedContextQuery;
}
项目:elasticsearch_my    文件:TermSuggester.java   
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare)
        throws IOException {
    DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
项目:elasticsearch_my    文件:CustomSuggester.java   
@Override
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name, CustomSuggestionsContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    // Get the suggestion context
    String text = suggestion.getText().utf8ToString();

    // create two suggestions with 12 and 123 appended
    Suggest.Suggestion<Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option>> response = new Suggest.Suggestion<>(name, suggestion.getSize());

    String firstSuggestion = String.format(Locale.ROOT, "%s-%s-%s-%s", text, suggestion.getField(), suggestion.options.get("suffix"), "12");
    Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> resultEntry12 = new Suggest.Suggestion.Entry<>(new Text(firstSuggestion), 0, text.length() + 2);
    response.addTerm(resultEntry12);

    String secondSuggestion = String.format(Locale.ROOT, "%s-%s-%s-%s", text, suggestion.getField(), suggestion.options.get("suffix"), "123");
    Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> resultEntry123 = new Suggest.Suggestion.Entry<>(new Text(secondSuggestion), 0, text.length() + 3);
    response.addTerm(resultEntry123);

    return response;
}
项目:lams    文件:SynonymMap.java   
/** Sugar: just joins the provided terms with {@link
 *  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
 *  must not be null. */
public static CharsRef join(String[] words, CharsRefBuilder reuse) {
  int upto = 0;
  char[] buffer = reuse.chars();
  for (String word : words) {
    final int wordLen = word.length();
    final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
    if (needed > buffer.length) {
      reuse.grow(needed);
      buffer = reuse.chars();
    }
    if (upto > 0) {
      buffer[upto++] = SynonymMap.WORD_SEPARATOR;
    }

    word.getChars(0, wordLen, buffer, upto);
    upto += wordLen;
  }
  reuse.setLength(upto);
  return reuse.get();
}
项目:lams    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:Elasticsearch    文件:XAnalyzingSuggester.java   
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
  LookupResult result;
  if (hasPayloads) {
    int sepIndex = -1;
    for(int i=0;i<output2.length;i++) {
      if (output2.bytes[output2.offset+i] == payloadSep) {
        sepIndex = i;
        break;
      }
    }
    assert sepIndex != -1;
    final int payloadLen = output2.length - sepIndex - 1;
    spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
    BytesRef payload = new BytesRef(payloadLen);
    System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
    payload.length = payloadLen;
    result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
  } else {
    spare.copyUTF8Bytes(output2);
    result = new LookupResult(spare.toString(), decodeWeight(output1));
  }

  return result;
}
项目:Elasticsearch    文件:TermSuggester.java   
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
项目:Elasticsearch    文件:TermVectorsResponse.java   
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
项目:solrplugins    文件:BidirectionalFacetResponseBuilder.java   
public LocalEnv(int offset, int limit, int startTermIndex, int adjust, int targetIdx, int nTerms, Predicate<BytesRef> termFilter,
    int mincount, int[] counts, CharsRefBuilder charsRef, boolean extend, SortedSetDocValues si,
    SolrIndexSearcher searcher, List<Entry<LeafReader, Bits>> leaves, String fieldName, T ft, NamedList res) {
  super(offset, limit, targetIdx, mincount, fieldName, ft, res);
  if (startTermIndex == -1) {
    // weird case where missing is counted at counts[0].
    this.startTermOrd = 0;
    this.endTermOrd = nTerms - 1;
  } else if (startTermIndex >= 0) {
    this.startTermOrd = startTermIndex;
    this.endTermOrd = startTermIndex + nTerms;
  } else {
    throw new IllegalStateException();
  }
  this.startTermIndex = startTermIndex;
  this.adjust = adjust;
  this.nTerms = nTerms;
  this.termFilter = termFilter;
  this.counts = counts;
  this.charsRef = charsRef;
  this.extend = extend;
  this.si = si;
  this.searcher = searcher;
  this.leaves = leaves;
}
项目:Alix    文件:MoreLikeThis.java   
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap
 *          a Map of terms and their frequencies
 * @param vector
 *          List of terms and their frequencies for a doc/field
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException
{
  final TermsEnum termsEnum = vector.iterator();
  final CharsRefBuilder spare = new CharsRefBuilder();
  BytesRef text;
  while ((text = termsEnum.next()) != null) {
    spare.copyUTF8Bytes(text);
    final String term = spare.toString();
    if (isNoiseWord(term)) {
      continue;
    }
    final int freq = (int) termsEnum.totalTermFreq();

    // increment frequency
    Int cnt = termFreqMap.get(term);
    if (cnt == null) {
      cnt = new Int();
      termFreqMap.put(term, cnt);
      cnt.x = freq;
    }
    else {
      cnt.x += freq;
    }
  }
}
项目:es-token-plugin    文件:TransportAllTermsShardAction.java   
protected static void getTerms(AllTermsShardRequest request, List<String> terms, List<LeafReaderContext> leaves) {
    List<TermsEnum> termIters = getTermsEnums(request, leaves);
    CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef lastTerm = null;
    int[] exhausted = new int[termIters.size()];
    for (int i = 0; i < exhausted.length; i++) {
        exhausted[i] = 0;
    }
    try {
        lastTerm = findSmallestTermAfter(request, termIters, lastTerm, exhausted);

        if (lastTerm == null) {
            return;
        }
        findNMoreTerms(request, terms, termIters, spare, lastTerm, exhausted);
    } catch (IOException e) {
    }
}
项目:es-token-plugin    文件:TransportAllTermsShardAction.java   
protected static void findNMoreTerms(AllTermsShardRequest request, List<String> terms, List<TermsEnum> termIters, CharsRefBuilder spare,
                                     BytesRef lastTerm, int[] exhausted) {
    if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) {
        spare.copyUTF8Bytes(lastTerm);
        terms.add(spare.toString());
    }
    BytesRef bytesRef = new BytesRef(lastTerm.utf8ToString());
    lastTerm = bytesRef;
    while (terms.size() < request.size() && lastTerm != null) {
        moveIterators(exhausted, termIters, lastTerm);
        lastTerm = findMinimum(exhausted, termIters);
        if (lastTerm != null) {
            if (getDocFreq(termIters, lastTerm, exhausted) >= request.minDocFreq()) {
                spare.copyUTF8Bytes(lastTerm);
                terms.add(spare.toString());
            }
        }
    }
}
项目:search    文件:MoreLikeThis.java   
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
  final TermsEnum termsEnum = vector.iterator(null);
  final CharsRefBuilder spare = new CharsRefBuilder();
  BytesRef text;
  while((text = termsEnum.next()) != null) {
    spare.copyUTF8Bytes(text);
    final String term = spare.toString();
    if (isNoiseWord(term)) {
      continue;
    }
    final int freq = (int) termsEnum.totalTermFreq();

    // increment frequency
    Int cnt = termFreqMap.get(term);
    if (cnt == null) {
      cnt = new Int();
      termFreqMap.put(term, cnt);
      cnt.x = freq;
    } else {
      cnt.x += freq;
    }
  }
}
项目:search    文件:AnalyzingSuggester.java   
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
  LookupResult result;
  if (hasPayloads) {
    int sepIndex = -1;
    for(int i=0;i<output2.length;i++) {
      if (output2.bytes[output2.offset+i] == PAYLOAD_SEP) {
        sepIndex = i;
        break;
      }
    }
    assert sepIndex != -1;
    spare.grow(sepIndex);
    final int payloadLen = output2.length - sepIndex - 1;
    spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
    BytesRef payload = new BytesRef(payloadLen);
    System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
    payload.length = payloadLen;
    result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
  } else {
    spare.grow(output2.length);
    spare.copyUTF8Bytes(output2);
    result = new LookupResult(spare.toString(), decodeWeight(output1));
  }

  return result;
}
项目:search    文件:FSTCompletionLookup.java   
@Override
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean higherWeightsFirst, int num) {
  if (contexts != null) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }
  final List<Completion> completions;
  if (higherWeightsFirst) {
    completions = higherWeightsCompletion.lookup(key, num);
  } else {
    completions = normalCompletion.lookup(key, num);
  }

  final ArrayList<LookupResult> results = new ArrayList<>(completions.size());
  CharsRefBuilder spare = new CharsRefBuilder();
  for (Completion c : completions) {
    spare.copyUTF8Bytes(c.utf8);
    results.add(new LookupResult(spare.toString(), c.bucket));
  }
  return results;
}
项目:search    文件:SynonymMap.java   
/** Sugar: just joins the provided terms with {@link
 *  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
 *  must not be null. */
public static CharsRef join(String[] words, CharsRefBuilder reuse) {
  int upto = 0;
  char[] buffer = reuse.chars();
  for (String word : words) {
    final int wordLen = word.length();
    final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
    if (needed > buffer.length) {
      reuse.grow(needed);
      buffer = reuse.chars();
    }
    if (upto > 0) {
      buffer[upto++] = SynonymMap.WORD_SEPARATOR;
    }

    word.getChars(0, wordLen, buffer, upto);
    upto += wordLen;
  }
  reuse.setLength(upto);
  return reuse.get();
}
项目:search    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:search    文件:TestIndexWriterUnicode.java   
public void testRandomUnicodeStrings() throws Throwable {
  char[] buffer = new char[20];
  char[] expected = new char[20];

  CharsRefBuilder utf16 = new CharsRefBuilder();

  int num = atLeast(100000);
  for (int iter = 0; iter < num; iter++) {
    boolean hasIllegal = fillUnicode(buffer, expected, 0, 20);

    BytesRef utf8 = new BytesRef(CharBuffer.wrap(buffer, 0, 20));
    if (!hasIllegal) {
      byte[] b = new String(buffer, 0, 20).getBytes(StandardCharsets.UTF_8);
      assertEquals(b.length, utf8.length);
      for(int i=0;i<b.length;i++)
        assertEquals(b[i], utf8.bytes[i]);
    }

    utf16.copyUTF8Bytes(utf8.bytes, 0, utf8.length);
    assertEquals(utf16.length(), 20);
    for(int i=0;i<20;i++)
      assertEquals(expected[i], utf16.charAt(i));
  }
}
项目:cc-analysis    文件:CcWordsFilterTest.java   
private CharsRef analyze(Analyzer analyzer, String text) throws IOException {
    CharsRefBuilder charsRefBuilder = new CharsRefBuilder();
    try (TokenStream ts = analyzer.tokenStream("", text)) {
        CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
        PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
        ts.reset();
        while (ts.incrementToken()) {
            int length = termAtt.length();
            if (length == 0) {
                throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
            }
            charsRefBuilder.grow(charsRefBuilder.length() + length + 1); /* current + word + separator */
            if (charsRefBuilder.length() > 0) {
                charsRefBuilder.append(CcWordSet.WORD_SEPARATOR);
            }
            charsRefBuilder.append(termAtt);
        }
        ts.end();
    }
    if (charsRefBuilder.length() == 0) {
        return null;
    }
    charsRefBuilder.append(CcWordSet.WORD_END);
    return charsRefBuilder.get();
}
项目:read-open-source-code    文件:MoreLikeThis.java   
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
  final TermsEnum termsEnum = vector.iterator(null);
  final CharsRefBuilder spare = new CharsRefBuilder();
  BytesRef text;
  while((text = termsEnum.next()) != null) {
    spare.copyUTF8Bytes(text);
    final String term = spare.toString();
    if (isNoiseWord(term)) {
      continue;
    }
    final int freq = (int) termsEnum.totalTermFreq();

    // increment frequency
    Int cnt = termFreqMap.get(term);
    if (cnt == null) {
      cnt = new Int();
      termFreqMap.put(term, cnt);
      cnt.x = freq;
    } else {
      cnt.x += freq;
    }
  }
}
项目:read-open-source-code    文件:AnalyzingSuggester.java   
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) {
  LookupResult result;
  if (hasPayloads) {
    int sepIndex = -1;
    for(int i=0;i<output2.length;i++) {
      if (output2.bytes[output2.offset+i] == PAYLOAD_SEP) {
        sepIndex = i;
        break;
      }
    }
    assert sepIndex != -1;
    spare.grow(sepIndex);
    final int payloadLen = output2.length - sepIndex - 1;
    spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex);
    BytesRef payload = new BytesRef(payloadLen);
    System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen);
    payload.length = payloadLen;
    result = new LookupResult(spare.toString(), decodeWeight(output1), payload);
  } else {
    spare.grow(output2.length);
    spare.copyUTF8Bytes(output2);
    result = new LookupResult(spare.toString(), decodeWeight(output1));
  }

  return result;
}
项目:read-open-source-code    文件:FSTCompletionLookup.java   
@Override
public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean higherWeightsFirst, int num) {
  if (contexts != null) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }
  final List<Completion> completions;
  if (higherWeightsFirst) {
    completions = higherWeightsCompletion.lookup(key, num);
  } else {
    completions = normalCompletion.lookup(key, num);
  }

  final ArrayList<LookupResult> results = new ArrayList<>(completions.size());
  CharsRefBuilder spare = new CharsRefBuilder();
  for (Completion c : completions) {
    spare.copyUTF8Bytes(c.utf8);
    results.add(new LookupResult(spare.toString(), c.bucket));
  }
  return results;
}
项目:read-open-source-code    文件:SynonymMap.java   
/** Sugar: just joins the provided terms with {@link
 *  SynonymMap#WORD_SEPARATOR}.  reuse and its chars
 *  must not be null. */
public static CharsRef join(String[] words, CharsRefBuilder reuse) {
  int upto = 0;
  char[] buffer = reuse.chars();
  for (String word : words) {
    final int wordLen = word.length();
    final int needed = (0 == upto ? wordLen : 1 + upto + wordLen); // Add 1 for WORD_SEPARATOR
    if (needed > buffer.length) {
      reuse.grow(needed);
      buffer = reuse.chars();
    }
    if (upto > 0) {
      buffer[upto++] = SynonymMap.WORD_SEPARATOR;
    }

    word.getChars(0, wordLen, buffer, upto);
    upto += wordLen;
  }
  reuse.setLength(upto);
  return reuse.get();
}
项目:read-open-source-code    文件:QueryAutoStopWordAnalyzer.java   
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;

  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
项目:elasticsearch_my    文件:CompletionSuggester.java   
@Override
protected Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> innerExecute(String name,
        final CompletionSuggestionContext suggestionContext, final IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    if (suggestionContext.getFieldType() != null) {
        final CompletionFieldMapper.CompletionFieldType fieldType = suggestionContext.getFieldType();
        CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
        spare.copyUTF8Bytes(suggestionContext.getText());
        CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(
            new Text(spare.toString()), 0, spare.length());
        completionSuggestion.addTerm(completionSuggestEntry);
        TopSuggestDocsCollector collector = new TopDocumentsCollector(suggestionContext.getSize());
        suggest(searcher, suggestionContext.toQuery(), collector);
        int numResult = 0;
        for (TopSuggestDocs.SuggestScoreDoc suggestScoreDoc : collector.get().scoreLookupDocs()) {
            TopDocumentsCollector.SuggestDoc suggestDoc = (TopDocumentsCollector.SuggestDoc) suggestScoreDoc;
            // collect contexts
            Map<String, Set<CharSequence>> contexts = Collections.emptyMap();
            if (fieldType.hasContextMappings() && suggestDoc.getContexts().isEmpty() == false) {
                contexts = fieldType.getContextMappings().getNamedContexts(suggestDoc.getContexts());
            }
            if (numResult++ < suggestionContext.getSize()) {
                CompletionSuggestion.Entry.Option option = new CompletionSuggestion.Entry.Option(suggestDoc.doc,
                    new Text(suggestDoc.key.toString()), suggestDoc.score, contexts);
                completionSuggestEntry.addOption(option);
            } else {
                break;
            }
        }
        return completionSuggestion;
    }
    return null;
}
项目:elasticsearch_my    文件:TermSuggester.java   
private static List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
    final List<Token> result = new ArrayList<>();
    final String field = suggestion.getField();
    DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field,
            new DirectCandidateGenerator.TokenConsumer() {
        @Override
        public void nextToken() {
            Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));
            result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset()));
        }
    }, spare);
   return result;
}
项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
    if (preFilter == null) {
        return term;
    }
    final BytesRefBuilder result = byteSpare;
    analyze(preFilter, term, field, new TokenConsumer() {

        @Override
        public void nextToken() throws IOException {
            this.fillBytesRef(result);
        }
    }, spare);
    return result.get();
}
项目:elasticsearch_my    文件:DirectCandidateGenerator.java   
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare)
        throws IOException {
    spare.copyUTF8Bytes(toAnalyze);
    CharsRef charsRef = spare.get();
    try (TokenStream ts = analyzer.tokenStream(
                              field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) {
         return analyze(ts, consumer);
    }
}
项目:elasticsearch_my    文件:Suggester.java   
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
            execute(String name, T suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    // #3469 We want to ignore empty shards

    if (searcher.getIndexReader().numDocs() == 0) {
        return null;
    }
    return innerExecute(name, suggestion, searcher, spare);
}
项目:elasticsearch_my    文件:XMoreLikeThis.java   
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
项目:elasticsearch_my    文件:TermVectorsResponse.java   
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
项目:lams    文件:DutchAnalyzer.java   
/**
 * @deprecated Use {@link #DutchAnalyzer(CharArraySet,CharArraySet,CharArrayMap)}
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  setVersion(matchVersion);
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion, stemExclusionTable));
  if (stemOverrideDict.isEmpty() || !matchVersion.onOrAfter(Version.LUCENE_3_1)) {
    this.stemdict = null;
    this.origStemdict = CharArrayMap.unmodifiableMap(CharArrayMap.copy(matchVersion, stemOverrideDict));
  } else {
    this.origStemdict = null;
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
项目:lams    文件:SynonymMap.java   
/** Sugar: analyzes the text with the analyzer and
 *  separates by {@link SynonymMap#WORD_SEPARATOR}.
 *  reuse and its chars must not be null. */
public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
  try (TokenStream ts = analyzer.tokenStream("", text)) {
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    ts.reset();
    reuse.clear();
    while (ts.incrementToken()) {
      int length = termAtt.length();
      if (length == 0) {
        throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
      }
      if (posIncAtt.getPositionIncrement() != 1) {
        throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
      }
      reuse.grow(reuse.length() + length + 1); /* current + word + separator */
      int end = reuse.length();
      if (reuse.length() > 0) {
        reuse.setCharAt(end++, SynonymMap.WORD_SEPARATOR);
        reuse.setLength(reuse.length() + 1);
      }
      System.arraycopy(termAtt.buffer(), 0, reuse.chars(), end, length);
      reuse.setLength(reuse.length() + length);
    }
    ts.end();
  }
  if (reuse.length() == 0) {
    throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
  }
  return reuse.get();
}
项目:lams    文件:WordnetSynonymParser.java   
@Override
public void parse(Reader in) throws IOException, ParseException {
  LineNumberReader br = new LineNumberReader(in);
  try {
    String line = null;
    String lastSynSetID = "";
    CharsRef synset[] = new CharsRef[8];
    int synsetSize = 0;

    while ((line = br.readLine()) != null) {
      String synSetID = line.substring(2, 11);

      if (!synSetID.equals(lastSynSetID)) {
        addInternal(synset, synsetSize);
        synsetSize = 0;
      }

      if (synset.length <= synsetSize+1) {
        synset = Arrays.copyOf(synset, synset.length * 2);
      }

      synset[synsetSize] = parseSynonym(line, new CharsRefBuilder());
      synsetSize++;
      lastSynSetID = synSetID;
    }

    // final synset in the file
    addInternal(synset, synsetSize);
  } catch (IllegalArgumentException e) {
    ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
    ex.initCause(e);
    throw ex;
  } finally {
    br.close();
  }
}
项目:Elasticsearch    文件:TermSuggester.java   
private List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
    final List<Token> result = new ArrayList<>();
    final String field = suggestion.getField();
    SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() {
        @Override
        public void nextToken() {
            Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));
            result.add(new Token(term, offsetAttr.startOffset(), offsetAttr.endOffset())); 
        }
    }, spare);
   return result;
}
项目:Elasticsearch    文件:DirectCandidateGenerator.java   
protected BytesRef preFilter(final BytesRef term, final CharsRefBuilder spare, final BytesRefBuilder byteSpare) throws IOException {
    if (preFilter == null) {
        return term;
    }
    final BytesRefBuilder result = byteSpare;
    SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {

        @Override
        public void nextToken() throws IOException {
            this.fillBytesRef(result);
        }
    }, spare);
    return result.get();
}
项目:Elasticsearch    文件:Suggester.java   
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>>
    execute(String name, T suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    // #3469 We want to ignore empty shards

    if (searcher.getIndexReader().numDocs() == 0) {
        return null;
    }
    return innerExecute(name, suggestion, searcher, spare);
}