Java 类org.apache.lucene.util.OfflineSorter 实例源码

项目:search    文件:ExternalRefSorter.java   
@Override
public BytesRefIterator iterator() throws IOException {
  if (sorted == null) {
    closeWriter();

    sorted = File.createTempFile("RefSorter-", ".sorted",
        OfflineSorter.defaultTempDir());
    sort.sort(input, sorted);

    input.delete();
    input = null;
  }

  return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorted),
      sort.getComparator());
}
项目:read-open-source-code    文件:ExternalRefSorter.java   
@Override
public BytesRefIterator iterator() throws IOException {
  if (sorted == null) {
    closeWriter();

    sorted = File.createTempFile("RefSorter-", ".sorted",
        OfflineSorter.defaultTempDir());
    sort.sort(input, sorted);

    input.delete();
    input = null;
  }

  return new ByteSequenceIterator(new OfflineSorter.ByteSequencesReader(sorted),
      sort.getComparator());
}
项目:search    文件:ExternalRefSorter.java   
/**
 * Will buffer all sequences to a temporary file and then sort (all on-disk).
 */
public ExternalRefSorter(OfflineSorter sort) throws IOException {
  this.sort = sort;
  this.input = File.createTempFile("RefSorter-", ".raw",
      OfflineSorter.defaultTempDir());
  this.writer = new OfflineSorter.ByteSequencesWriter(input);
}
项目:search    文件:SortedTermFreqIteratorWrapper.java   
private ByteSequencesReader sort() throws IOException {
  String prefix = getClass().getSimpleName();
  File directory = OfflineSorter.defaultTempDir();
  tempInput = File.createTempFile(prefix, ".input", directory);
  tempSorted = File.createTempFile(prefix, ".sorted", directory);

  final ByteSequencesWriter writer = new ByteSequencesWriter(tempInput);
  boolean success = false;
  try {
    BytesRef spare;
    byte[] buffer = new byte[0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);

    while ((spare = source.next()) != null) {
      encode(writer, output, buffer, spare, source.weight());
    }
    writer.close();
    new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
    ByteSequencesReader reader = new ByteSequencesReader(tempSorted);
    success = true;
    return reader;

  } finally {
    if (success) {
      IOUtils.close(writer);
    } else {
      try {
        IOUtils.closeWhileHandlingException(writer);
      } finally {
        close();
      }
    }
  }
}
项目:search    文件:SortedInputIterator.java   
private ByteSequencesReader sort() throws IOException {
  String prefix = getClass().getSimpleName();
  File directory = OfflineSorter.defaultTempDir();
  tempInput = File.createTempFile(prefix, ".input", directory);
  tempSorted = File.createTempFile(prefix, ".sorted", directory);

  final OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
  boolean success = false;
  try {
    BytesRef spare;
    byte[] buffer = new byte[0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);

    while ((spare = source.next()) != null) {
      encode(writer, output, buffer, spare, source.payload(), source.contexts(), source.weight());
    }
    writer.close();
    new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
    ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(tempSorted);
    success = true;
    return reader;

  } finally {
    if (success) {
      IOUtils.close(writer);
    } else {
      try {
        IOUtils.closeWhileHandlingException(writer);
      } finally {
        close();
      }
    }
  }
}
项目:search    文件:LargeInputFST.java   
public static void main(String[] args) throws IOException {
  File input = new File("/home/dweiss/tmp/shuffled.dict");

  int buckets = 20;
  int shareMaxTail = 10;

  ExternalRefSorter sorter = new ExternalRefSorter(new OfflineSorter());
  FSTCompletionBuilder builder = new FSTCompletionBuilder(buckets, sorter, shareMaxTail);

  BufferedReader reader = new BufferedReader(
      new InputStreamReader(
          new FileInputStream(input), StandardCharsets.UTF_8));

  BytesRefBuilder scratch = new BytesRefBuilder();
  String line;
  int count = 0;
  while ((line = reader.readLine()) != null) {
    scratch.copyChars(line);
    builder.add(scratch.get(), count % buckets);
    if ((count++ % 100000) == 0) {
      System.err.println("Line: " + count);
    }
  }

  System.out.println("Building FSTCompletion.");
  FSTCompletion completion = builder.build();

  File fstFile = new File("completion.fst");
  System.out.println("Done. Writing automaton: " + fstFile.getAbsolutePath());
  completion.getFST().save(fstFile);
  sorter.close();
}
项目:search    文件:TestOfflineSorter.java   
/**
 * Check sorting data on an instance of {@link OfflineSorter}.
 */
private SortInfo checkSort(OfflineSorter sort, byte[][] data) throws IOException {
  File unsorted = writeAll("unsorted", data);

  Arrays.sort(data, unsignedByteOrderComparator);
  File golden = writeAll("golden", data);

  File sorted = new File(tempDir, "sorted");
  SortInfo sortInfo = sort.sort(unsorted, sorted);
  //System.out.println("Input size [MB]: " + unsorted.length() / (1024 * 1024));
  //System.out.println(sortInfo);

  assertFilesIdentical(golden, sorted);
  return sortInfo;
}
项目:search    文件:TestOfflineSorter.java   
private File writeAll(String name, byte[][] data) throws IOException {
  File file = new File(tempDir, name);
  ByteSequencesWriter w = new OfflineSorter.ByteSequencesWriter(file);
  for (byte [] datum : data) {
    w.write(datum);
  }
  w.close();
  return file;
}
项目:read-open-source-code    文件:ExternalRefSorter.java   
/**
 * Will buffer all sequences to a temporary file and then sort (all on-disk).
 */
public ExternalRefSorter(OfflineSorter sort) throws IOException {
  this.sort = sort;
  this.input = File.createTempFile("RefSorter-", ".raw",
      OfflineSorter.defaultTempDir());
  this.writer = new OfflineSorter.ByteSequencesWriter(input);
}
项目:read-open-source-code    文件:SortedTermFreqIteratorWrapper.java   
private ByteSequencesReader sort() throws IOException {
  String prefix = getClass().getSimpleName();
  File directory = OfflineSorter.defaultTempDir();
  tempInput = File.createTempFile(prefix, ".input", directory);
  tempSorted = File.createTempFile(prefix, ".sorted", directory);

  final ByteSequencesWriter writer = new ByteSequencesWriter(tempInput);
  boolean success = false;
  try {
    BytesRef spare;
    byte[] buffer = new byte[0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);

    while ((spare = source.next()) != null) {
      encode(writer, output, buffer, spare, source.weight());
    }
    writer.close();
    new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
    ByteSequencesReader reader = new ByteSequencesReader(tempSorted);
    success = true;
    return reader;

  } finally {
    if (success) {
      IOUtils.close(writer);
    } else {
      try {
        IOUtils.closeWhileHandlingException(writer);
      } finally {
        close();
      }
    }
  }
}
项目:read-open-source-code    文件:SortedInputIterator.java   
private ByteSequencesReader sort() throws IOException {
  String prefix = getClass().getSimpleName();
  File directory = OfflineSorter.defaultTempDir();
  tempInput = File.createTempFile(prefix, ".input", directory);
  tempSorted = File.createTempFile(prefix, ".sorted", directory);

  final OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
  boolean success = false;
  try {
    BytesRef spare;
    byte[] buffer = new byte[0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);

    while ((spare = source.next()) != null) {
      encode(writer, output, buffer, spare, source.payload(), source.contexts(), source.weight());
    }
    writer.close();
    new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
    ByteSequencesReader reader = new OfflineSorter.ByteSequencesReader(tempSorted);
    success = true;
    return reader;

  } finally {
    if (success) {
      IOUtils.close(writer);
    } else {
      try {
        IOUtils.closeWhileHandlingException(writer);
      } finally {
        close();
      }
    }
  }
}
项目:search    文件:BytesRefSortersTest.java   
@Test
public void testExternalRefSorter() throws Exception {
  ExternalRefSorter s = new ExternalRefSorter(new OfflineSorter());
  check(s);
  s.close();
}
项目:search    文件:TestOfflineSorter.java   
public void testEmpty() throws Exception {
  checkSort(new OfflineSorter(), new byte [][] {});
}
项目:search    文件:TestOfflineSorter.java   
public void testSingleLine() throws Exception {
  checkSort(new OfflineSorter(), new byte [][] {
      "Single line only.".getBytes(StandardCharsets.UTF_8)
  });
}
项目:search    文件:TestOfflineSorter.java   
public void testIntermediateMerges() throws Exception {
  // Sort 20 mb worth of data with 1mb buffer, binary merging.
  SortInfo info = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.defaultTempDir(), 2), 
      generateRandom((int)OfflineSorter.MB * 20));
  assertTrue(info.mergeRounds > 10);
}
项目:search    文件:TestOfflineSorter.java   
public void testSmallRandom() throws Exception {
  // Sort 20 mb worth of data with 1mb buffer.
  SortInfo sortInfo = checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(1), OfflineSorter.defaultTempDir(), OfflineSorter.MAX_TEMPFILES), 
      generateRandom((int)OfflineSorter.MB * 20));
  assertEquals(1, sortInfo.mergeRounds);
}
项目:search    文件:TestOfflineSorter.java   
@Nightly
public void testLargerRandom() throws Exception {
  // Sort 100MB worth of data with 15mb buffer.
  checkSort(new OfflineSorter(OfflineSorter.DEFAULT_COMPARATOR, BufferSize.megabytes(16), OfflineSorter.defaultTempDir(), OfflineSorter.MAX_TEMPFILES), 
      generateRandom((int)OfflineSorter.MB * 100));
}