Java 类org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2 实例源码

项目:ditb    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:LCIndex-HBase-0.94.16    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:pbase    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:HIndex    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:IRIndex    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:PyroDB    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:c5    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:HBase-Research    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:hbase-0.94.8-qod    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:hbase-0.94.8-qod    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:DominoHBase    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:hindex    文件:TestCacheOnWriteInSchema.java   
private void writeStoreFile(StoreFile.Writer writer) throws IOException {
  final int rowLen = 32;
  for (int i = 0; i < NUM_KV; ++i) {
    byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
    byte[] v = TestHFileWriterV2.randomValue(rand);
    int cfLen = rand.nextInt(k.length - rowLen + 1);
    KeyValue kv = new KeyValue(
        k, 0, rowLen,
        k, rowLen, cfLen,
        k, rowLen + cfLen, k.length - rowLen - cfLen,
        rand.nextLong(),
        generateKeyType(rand),
        v, 0, v.length);
    writer.append(kv);
  }
}
项目:ditb    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:ditb    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row,
    byte[] qualifier) {
  Scan scan = new Scan(row, row);
  scan.addColumn(Bytes.toBytes(TestHFileWriterV2.COLUMN_FAMILY_NAME), qualifier);
  Store store = mock(Store.class);
  HColumnDescriptor hcd = mock(HColumnDescriptor.class);
  when(hcd.getName()).thenReturn(Bytes.toBytes(TestHFileWriterV2.COLUMN_FAMILY_NAME));
  when(store.getFamily()).thenReturn(hcd);
  return scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:pbase    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:c5    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:HBase-Research    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:DominoHBase    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:hindex    文件:TestCompoundBloomFilter.java   
private List<KeyValue> createSortedKeyValues(Random rand, int n) {
  List<KeyValue> kvList = new ArrayList<KeyValue>(n);
  for (int i = 0; i < n; ++i)
    kvList.add(TestHFileWriterV2.randomKeyValue(rand));
  Collections.sort(kvList, KeyValue.COMPARATOR);
  return kvList;
}
项目:ditb    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:ditb    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row, TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row,
      TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:pbase    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:pbase    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row,
      TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row,
      TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row,
      TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row,
      TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:c5    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:c5    文件:TestCompoundBloomFilter.java   
private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,
    Random rand) {
  return isInBloom(scanner, row,
      TestHFileWriterV2.randomRowOrQualifier(rand));
}
项目:HBase-Research    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}