Java 类org.apache.lucene.analysis.standard.ClassicAnalyzer 实例源码

项目:search    文件:TestClassicAnalyzer.java   
public void testMaxTermLength2() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer();
  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
  sa.setMaxTokenLength(5);

  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
}
项目:search    文件:TestClassicAnalyzer.java   
public void testLucene1140() throws Exception {
  try {
    ClassicAnalyzer analyzer = new ClassicAnalyzer();
    assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
  } catch (NullPointerException e) {
    fail("Should not throw an NPE and it did");
  }

}
项目:NYBC    文件:TestClassicAnalyzer.java   
public void testMaxTermLength2() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
  sa.setMaxTokenLength(5);

  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
}
项目:NYBC    文件:TestClassicAnalyzer.java   
public void testLucene1140() throws Exception {
  try {
    ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
  } catch (NullPointerException e) {
    fail("Should not throw an NPE and it did");
  }

}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
public void testMaxTermLength2() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
  sa.setMaxTokenLength(5);

  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
public void testLucene1140() throws Exception {
  try {
    ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
  } catch (NullPointerException e) {
    fail("Should not throw an NPE and it did");
  }

}
项目:search    文件:TestClassicAnalyzer.java   
public void testMaxTermLength() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer();
  sa.setMaxTokenLength(5);
  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
}
项目:search    文件:TestClassicAnalyzer.java   
public void testJava14BWCompatibility() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_3_0);
  assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
}
项目:search    文件:TestClassicAnalyzer.java   
/**
 * Make sure we skip wicked long terms.
*/
public void testWickedLongTerm() throws IOException {
  RAMDirectory dir = new RAMDirectory();
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer()));

  char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
  Arrays.fill(chars, 'x');
  Document doc = new Document();
  final String bigTerm = new String(chars);

  // This produces a too-long term:
  String contents = "abc xyz x" + bigTerm + " another term";
  doc.add(new TextField("content", contents, Field.Store.NO));
  writer.addDocument(doc);

  // Make sure we can add another normal document
  doc = new Document();
  doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = IndexReader.open(dir);

  // Make sure all terms < max size were indexed
  assertEquals(2, reader.docFreq(new Term("content", "abc")));
  assertEquals(1, reader.docFreq(new Term("content", "bbb")));
  assertEquals(1, reader.docFreq(new Term("content", "term")));
  assertEquals(1, reader.docFreq(new Term("content", "another")));

  // Make sure position is still incremented when
  // massive term is skipped:
  DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                              MultiFields.getLiveDocs(reader),
                                                              "content",
                                                              new BytesRef("another"));
  assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, tps.freq());
  assertEquals(3, tps.nextPosition());

  // Make sure the doc that has the massive term is in
  // the index:
  assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

  reader.close();

  // Make sure we can add a document with exactly the
  // maximum length term, and search on that term:
  doc = new Document();
  doc.add(new TextField("content", bigTerm, Field.Store.NO));
  ClassicAnalyzer sa = new ClassicAnalyzer();
  sa.setMaxTokenLength(100000);
  writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
  writer.addDocument(doc);
  writer.close();
  reader = IndexReader.open(dir);
  assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
  reader.close();

  dir.close();
}
项目:search    文件:TestClassicAnalyzer.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  checkRandomData(random(), new ClassicAnalyzer(), 1000*RANDOM_MULTIPLIER);
}
项目:search    文件:TestClassicAnalyzer.java   
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  Random random = random();
  checkRandomData(random, new ClassicAnalyzer(), 100*RANDOM_MULTIPLIER, 8192);
}
项目:stratio-cassandra    文件:PreBuiltAnalyzersTest.java   
@Test
public void testGetClassic() {
    Analyzer analyzer = PreBuiltAnalyzers.CLASSIC.get();
    Assert.assertEquals(ClassicAnalyzer.class, analyzer.getClass());
}
项目:NYBC    文件:TestClassicAnalyzer.java   
public void testMaxTermLength() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  sa.setMaxTokenLength(5);
  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
}
项目:NYBC    文件:TestClassicAnalyzer.java   
public void testJava14BWCompatibility() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
  assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
}
项目:NYBC    文件:TestClassicAnalyzer.java   
/**
 * Make sure we skip wicked long terms.
*/
public void testWickedLongTerm() throws IOException {
  RAMDirectory dir = new RAMDirectory();
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
    TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

  char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
  Arrays.fill(chars, 'x');
  Document doc = new Document();
  final String bigTerm = new String(chars);

  // This produces a too-long term:
  String contents = "abc xyz x" + bigTerm + " another term";
  doc.add(new TextField("content", contents, Field.Store.NO));
  writer.addDocument(doc);

  // Make sure we can add another normal document
  doc = new Document();
  doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = IndexReader.open(dir);

  // Make sure all terms < max size were indexed
  assertEquals(2, reader.docFreq(new Term("content", "abc")));
  assertEquals(1, reader.docFreq(new Term("content", "bbb")));
  assertEquals(1, reader.docFreq(new Term("content", "term")));
  assertEquals(1, reader.docFreq(new Term("content", "another")));

  // Make sure position is still incremented when
  // massive term is skipped:
  DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                              MultiFields.getLiveDocs(reader),
                                                              "content",
                                                              new BytesRef("another"));
  assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, tps.freq());
  assertEquals(3, tps.nextPosition());

  // Make sure the doc that has the massive term is in
  // the index:
  assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

  reader.close();

  // Make sure we can add a document with exactly the
  // maximum length term, and search on that term:
  doc = new Document();
  doc.add(new TextField("content", bigTerm, Field.Store.NO));
  ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  sa.setMaxTokenLength(100000);
  writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
  writer.addDocument(doc);
  writer.close();
  reader = IndexReader.open(dir);
  assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
  reader.close();

  dir.close();
}
项目:NYBC    文件:TestClassicAnalyzer.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER);
}
项目:NYBC    文件:TestClassicAnalyzer.java   
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  Random random = random();
  checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
public void testMaxTermLength() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  sa.setMaxTokenLength(5);
  assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
public void testJava14BWCompatibility() throws Exception {
  ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
  assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
/**
 * Make sure we skip wicked long terms.
*/
public void testWickedLongTerm() throws IOException {
  RAMDirectory dir = new RAMDirectory();
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
    TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));

  char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
  Arrays.fill(chars, 'x');
  Document doc = new Document();
  final String bigTerm = new String(chars);

  // This produces a too-long term:
  String contents = "abc xyz x" + bigTerm + " another term";
  doc.add(new TextField("content", contents, Field.Store.NO));
  writer.addDocument(doc);

  // Make sure we can add another normal document
  doc = new Document();
  doc.add(new TextField("content", "abc bbb ccc", Field.Store.NO));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = IndexReader.open(dir);

  // Make sure all terms < max size were indexed
  assertEquals(2, reader.docFreq(new Term("content", "abc")));
  assertEquals(1, reader.docFreq(new Term("content", "bbb")));
  assertEquals(1, reader.docFreq(new Term("content", "term")));
  assertEquals(1, reader.docFreq(new Term("content", "another")));

  // Make sure position is still incremented when
  // massive term is skipped:
  DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                              MultiFields.getLiveDocs(reader),
                                                              "content",
                                                              new BytesRef("another"));
  assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, tps.freq());
  assertEquals(3, tps.nextPosition());

  // Make sure the doc that has the massive term is in
  // the index:
  assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());

  reader.close();

  // Make sure we can add a document with exactly the
  // maximum length term, and search on that term:
  doc = new Document();
  doc.add(new TextField("content", bigTerm, Field.Store.NO));
  ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  sa.setMaxTokenLength(100000);
  writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
  writer.addDocument(doc);
  writer.close();
  reader = IndexReader.open(dir);
  assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
  reader.close();

  dir.close();
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  checkRandomData(random(), new ClassicAnalyzer(TEST_VERSION_CURRENT), 1000*RANDOM_MULTIPLIER);
}
项目:Maskana-Gestor-de-Conocimiento    文件:TestClassicAnalyzer.java   
/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
  Random random = random();
  checkRandomData(random, new ClassicAnalyzer(TEST_VERSION_CURRENT), 100*RANDOM_MULTIPLIER, 8192);
}