Java 类org.apache.hadoop.hbase.util.CompoundBloomFilter 实例源码

项目:ditb    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:pbase    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:c5    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:HBase-Research    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:DominoHBase    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:hindex    文件:TestCompoundBloomFilter.java   
/**
 * Validates the false positive ratio by computing its z-value and comparing
 * it to the provided threshold.
 *
 * @param falsePosRate experimental positive rate
 * @param nTrials the number of Bloom filter checks
 * @param zValueBoundary z-value boundary, positive for an upper bound and
 *          negative for a lower bound
 * @param cbf the compound Bloom filter we are using
 * @param additionalMsg additional message to include in log output and
 *          assertion failures
 */
private void validateFalsePosRate(double falsePosRate, int nTrials,
    double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) {
  double p = BloomFilterFactory.getErrorRate(conf);
  double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials);

  String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate="
      + falsePosRate + ", nTrials=" + nTrials + ")";
  LOG.info("z-value is " + zValue + assortedStatsStr);

  boolean isUpperBound = zValueBoundary > 0;

  if (isUpperBound && zValue > zValueBoundary ||
      !isUpperBound && zValue < zValueBoundary) {
    String errorMsg = "False positive rate z-value " + zValue + " is "
        + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary
        + assortedStatsStr + ". Per-chunk stats:\n"
        + cbf.formatTestingStats();
    fail(errorMsg + additionalMsg);
  }
}
项目:ditb    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:pbase    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:c5    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:HBase-Research    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:DominoHBase    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:hindex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}