Java 类org.apache.lucene.analysis.compound.hyphenation.HyphenationTree 实例源码

项目:search    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDA() throws Exception {
  CharArraySet dict = makeDictionary("læse", "hest");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
      hyphenator,
      dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  assertTokenStreamContents(tf, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
项目:search    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
项目:NYBC    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDA() throws Exception {
  CharArraySet dict = makeDictionary("læse", "hest");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
      new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
      hyphenator,
      dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  assertTokenStreamContents(tf, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
项目:NYBC    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
项目:Maskana-Gestor-de-Conocimiento    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDA() throws Exception {
  CharArraySet dict = makeDictionary("læse", "hest");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
      new MockTokenizer(new StringReader("min veninde som er lidt af en læsehest"), MockTokenizer.WHITESPACE, false), 
      hyphenator,
      dict, CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  assertTokenStreamContents(tf, 
      new String[] { "min", "veninde", "som", "er", "lidt", "af", "en", "læsehest", "læse", "hest" },
      new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }
  );
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestCompoundWordTokenFilter.java   
public void testHyphenationCompoundWordsDELongestMatch() throws Exception {
  CharArraySet dict = makeDictionary("basketball", "basket", "ball", "kurv");

  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  // the word basket will not be added due to the longest match option
  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(TEST_VERSION_CURRENT, 
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false), 
      hyphenator, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, 40, true);
  assertTokenStreamContents(tf, 
      new String[] { "basketballkurv", "basketball", "ball", "kurv" },
      new int[] { 1, 0, 0, 0 }
  );

}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary) {
  this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
      DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet,int,int,int,boolean)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
    int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,int,int,int)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
    int maxSubwordSize) {
  this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator) {
  this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
      DEFAULT_MAX_SUBWORD_SIZE);
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary) {
  this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
      DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet,int,int,int,boolean)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
    int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,int,int,int)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
    int maxSubwordSize) {
  this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator) {
  this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
      DEFAULT_MAX_SUBWORD_SIZE);
}
项目:search    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  // TODO: make nastier
  try {
    InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = Lucene43HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    return hyphenator;
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
项目:NYBC    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  // TODO: make nastier
  try {
    InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    return hyphenator;
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary) {
  this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
      DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false);
}
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,CharArraySet,int,int,int,boolean)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
    int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree,int,int,int)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
    int maxSubwordSize) {
  this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * @deprecated Use {@link #HyphenationCompoundWordTokenFilter(TokenStream,HyphenationTree)}
 */
@Deprecated
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator) {
  this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
      DEFAULT_MAX_SUBWORD_SIZE);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestRandomChains.java   
@Override public Object create(Random random) {
  // TODO: make nastier
  try {
    InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
    HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
    return hyphenator;
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
项目:search    文件:TestCompoundWordTokenFilter.java   
/**
 * With hyphenation-only, you can get a lot of nonsense tokens.
 * This can be controlled with the min/max subword size.
 */
public void testHyphenationOnly() throws Exception {
  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(

      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      2, 4);

  // min=2, max=4
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );

  tf = new HyphenationCompoundWordTokenFilter(

      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 6);

  // min=4, max=6
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" }
  );

  tf = new HyphenationCompoundWordTokenFilter(

      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 10);

  // min=4, max=10
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", 
                     "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" }
  );

}
项目:NYBC    文件:TestCompoundWordTokenFilter.java   
/**
 * With hyphenation-only, you can get a lot of nonsense tokens.
 * This can be controlled with the min/max subword size.
 */
public void testHyphenationOnly() throws Exception {
  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      2, 4);

  // min=2, max=4
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );

  tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 6);

  // min=4, max=6
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" }
  );

  tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 10);

  // min=4, max=10
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", 
                     "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" }
  );

}
项目:Maskana-Gestor-de-Conocimiento    文件:TestCompoundWordTokenFilter.java   
/**
 * With hyphenation-only, you can get a lot of nonsense tokens.
 * This can be controlled with the min/max subword size.
 */
public void testHyphenationOnly() throws Exception {
  InputSource is = new InputSource(getClass().getResource("da_UTF8.xml").toExternalForm());
  HyphenationTree hyphenator = HyphenationCompoundWordTokenFilter
      .getHyphenationTree(is);

  HyphenationCompoundWordTokenFilter tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      2, 4);

  // min=2, max=4
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "ba", "sket", "bal", "ball", "kurv" }
  );

  tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 6);

  // min=4, max=6
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "sket", "ball", "lkurv", "kurv" }
  );

  tf = new HyphenationCompoundWordTokenFilter(
      TEST_VERSION_CURRENT,
      new MockTokenizer(new StringReader("basketballkurv"), MockTokenizer.WHITESPACE, false),
      hyphenator,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      4, 10);

  // min=4, max=10
  assertTokenStreamContents(tf,
      new String[] { "basketballkurv", "basket", "basketbal", "basketball", "sket", 
                     "sketbal", "sketball", "ball", "ballkurv", "lkurv", "kurv" }
  );

}
项目:lams    文件:Lucene43HyphenationCompoundWordTokenFilter.java   
/**
 * Creates a new {@link Lucene43HyphenationCompoundWordTokenFilter} instance.
 *
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
                                                  HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
                                                  int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:lams    文件:Lucene43HyphenationCompoundWordTokenFilter.java   
/**
 * Create a HyphenationCompoundWordTokenFilter with no dictionary.
 * <p>
 * Calls {@link #Lucene43HyphenationCompoundWordTokenFilter(TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
 * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
 * null, minWordSize, minSubwordSize, maxSubwordSize }
 */
public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
                                                  HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
                                                  int maxSubwordSize) {
  this(input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:lams    文件:Lucene43HyphenationCompoundWordTokenFilter.java   
/**
 * Create a hyphenator tree
 * 
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
 *
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public HyphenationCompoundWordTokenFilter(TokenStream input,
                                          HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
                                          int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Create a HyphenationCompoundWordTokenFilter with no dictionary.
 * <p>
 * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean)
 * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
 * null, minWordSize, minSubwordSize, maxSubwordSize }
 */
public HyphenationCompoundWordTokenFilter(TokenStream input,
                                          HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
                                          int maxSubwordSize) {
  this(input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:lams    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Create a hyphenator tree
 *
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
项目:search    文件:Lucene43HyphenationCompoundWordTokenFilter.java   
/**
 * Creates a new {@link Lucene43HyphenationCompoundWordTokenFilter} instance.
 *
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
                                                  HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
                                                  int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:search    文件:Lucene43HyphenationCompoundWordTokenFilter.java   
/**
 * Create a HyphenationCompoundWordTokenFilter with no dictionary.
 * <p>
 * Calls {@link #Lucene43HyphenationCompoundWordTokenFilter(TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
 * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
 * null, minWordSize, minSubwordSize, maxSubwordSize }
 */
public Lucene43HyphenationCompoundWordTokenFilter(TokenStream input,
                                                  HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
                                                  int maxSubwordSize) {
  this(input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:search    文件:Lucene43HyphenationCompoundWordTokenFilter.java   
/**
 * Create a hyphenator tree
 * 
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
 *
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public HyphenationCompoundWordTokenFilter(TokenStream input,
                                          HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
                                          int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Create a HyphenationCompoundWordTokenFilter with no dictionary.
 * <p>
 * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean)
 * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
 * null, minWordSize, minSubwordSize, maxSubwordSize }
 */
public HyphenationCompoundWordTokenFilter(TokenStream input,
                                          HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
                                          int maxSubwordSize) {
  this(input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:search    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Create a hyphenator tree
 *
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
项目:NYBC    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
 * 
 * @param matchVersion
 *          Lucene version to enable correct Unicode 4.0 behavior in the
 *          dictionaries if Version > 3.0. See <a
 *          href="CompoundWordTokenFilterBase.html#version"
 *          >CompoundWordTokenFilterBase</a> for details.
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
    int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}
项目:NYBC    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Create a HyphenationCompoundWordTokenFilter with no dictionary.
 * <p>
 * Calls {@link #HyphenationCompoundWordTokenFilter(Version, TokenStream, HyphenationTree, CharArraySet, int, int, int, boolean)
 * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
 * null, minWordSize, minSubwordSize, maxSubwordSize }
 */
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
    int maxSubwordSize) {
  this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
      maxSubwordSize, false);
}
项目:NYBC    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Create a hyphenator tree
 * 
 * @param hyphenationSource the InputSource pointing to the XML grammar
 * @return An object representing the hyphenation patterns
 * @throws IOException If there is a low-level I/O error.
 */
public static HyphenationTree getHyphenationTree(InputSource hyphenationSource)
    throws IOException {
  HyphenationTree tree = new HyphenationTree();
  tree.loadPatterns(hyphenationSource);
  return tree;
}
项目:read-open-source-code    文件:HyphenationCompoundWordTokenFilter.java   
/**
 * Creates a new {@link HyphenationCompoundWordTokenFilter} instance.
 * 
 * @param matchVersion
 *          Lucene version to enable correct Unicode 4.0 behavior in the
 *          dictionaries if Version > 3.0. See <a
 *          href="CompoundWordTokenFilterBase.html#version"
 *          >CompoundWordTokenFilterBase</a> for details.
 * @param input
 *          the {@link TokenStream} to process
 * @param hyphenator
 *          the hyphenation pattern tree to use for hyphenation
 * @param dictionary
 *          the word dictionary to match against.
 * @param minWordSize
 *          only words longer than this get processed
 * @param minSubwordSize
 *          only subwords longer than this get to the output stream
 * @param maxSubwordSize
 *          only subwords shorter than this get to the output stream
 * @param onlyLongestMatch
 *          Add only the longest matching subword to the stream
 */
public HyphenationCompoundWordTokenFilter(Version matchVersion, TokenStream input,
    HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize,
    int minSubwordSize, int maxSubwordSize, boolean onlyLongestMatch) {
  super(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
      onlyLongestMatch);

  this.hyphenator = hyphenator;
}