Java 类org.apache.hadoop.hbase.util.ByteBloomFilter 实例源码

项目:ditb    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:pbase    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:c5    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:HBase-Research    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:DominoHBase    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:hindex    文件:TestCompoundBloomFilter.java   
@Test
public void testCompoundBloomSizing() {
  int bloomBlockByteSize = 4096;
  int bloomBlockBitSize = bloomBlockByteSize * 8;
  double targetErrorRate = 0.01;
  long maxKeysPerChunk = ByteBloomFilter.idealMaxKeys(bloomBlockBitSize,
      targetErrorRate);

  long bloomSize1 = bloomBlockByteSize * 8;
  long bloomSize2 = ByteBloomFilter.computeBitSize(maxKeysPerChunk,
      targetErrorRate);

  double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);
  assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);
}
项目:ditb    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:LCIndex-HBase-0.94.16    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:pbase    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:HIndex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:IRIndex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:RStore    文件:HFilePrettyPrinter.java   
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
    throws IOException {
  System.out.println("Block index size as per heapsize: "
      + reader.indexSize());
  System.out.println(asSeparateLines(reader.toString()));
  System.out.println("Trailer:\n    "
      + asSeparateLines(reader.getTrailer().toString()));
  System.out.println("Fileinfo:");
  for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
    System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
    if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
      long seqid = Bytes.toLong(e.getValue());
      System.out.println(seqid);
    } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(e.getValue(), timeRangeTracker);
      System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
          + timeRangeTracker.getMaximumTimestamp());
    } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
        || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
      System.out.println(Bytes.toInt(e.getValue()));
    } else {
      System.out.println(Bytes.toStringBinary(e.getValue()));
    }
  }

  System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));

  // Printing bloom information
  DataInput bloomMeta = reader.getBloomFilterMetadata();
  BloomFilter bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
        ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }
}
项目:PyroDB    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:c5    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:HBase-Research    文件:HFilePrettyPrinter.java   
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
    throws IOException {
  System.out.println("Block index size as per heapsize: "
      + reader.indexSize());
  System.out.println(asSeparateLines(reader.toString()));
  System.out.println("Trailer:\n    "
      + asSeparateLines(reader.getTrailer().toString()));
  System.out.println("Fileinfo:");
  for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
    System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
    if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
      long seqid = Bytes.toLong(e.getValue());
      System.out.println(seqid);
    } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(e.getValue(), timeRangeTracker);
      System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
          + timeRangeTracker.getMaximumTimestamp());
    } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
        || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
      System.out.println(Bytes.toInt(e.getValue()));
    } else {
      System.out.println(Bytes.toStringBinary(e.getValue()));
    }
  }

  System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));

  // Printing general bloom information
  DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
  BloomFilter bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
        ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }

  // Printing delete bloom information
  bloomMeta = reader.getDeleteBloomFilterMetadata();
  bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Delete Family Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES
        + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
            "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }
}
项目:HBase-Research    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:hbase-0.94.8-qod    文件:HFilePrettyPrinter.java   
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
    throws IOException {
  System.out.println("Block index size as per heapsize: "
      + reader.indexSize());
  System.out.println(asSeparateLines(reader.toString()));
  System.out.println("Trailer:\n    "
      + asSeparateLines(reader.getTrailer().toString()));
  System.out.println("Fileinfo:");
  for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
    System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
    if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
      long seqid = Bytes.toLong(e.getValue());
      System.out.println(seqid);
    } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(e.getValue(), timeRangeTracker);
      System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
          + timeRangeTracker.getMaximumTimestamp());
    } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
        || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
      System.out.println(Bytes.toInt(e.getValue()));
    } else {
      System.out.println(Bytes.toStringBinary(e.getValue()));
    }
  }

  System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));

  // Printing general bloom information
  DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
  BloomFilter bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
        ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }

  // Printing delete bloom information
  bloomMeta = reader.getDeleteBloomFilterMetadata();
  bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Delete Family Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES
        + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
            "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:hbase-0.94.8-qod    文件:HFilePrettyPrinter.java   
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
    throws IOException {
  System.out.println("Block index size as per heapsize: "
      + reader.indexSize());
  System.out.println(asSeparateLines(reader.toString()));
  System.out.println("Trailer:\n    "
      + asSeparateLines(reader.getTrailer().toString()));
  System.out.println("Fileinfo:");
  for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
    System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
    if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
      long seqid = Bytes.toLong(e.getValue());
      System.out.println(seqid);
    } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(e.getValue(), timeRangeTracker);
      System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
          + timeRangeTracker.getMaximumTimestamp());
    } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
        || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
      System.out.println(Bytes.toInt(e.getValue()));
    } else {
      System.out.println(Bytes.toStringBinary(e.getValue()));
    }
  }

  System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));

  // Printing general bloom information
  DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
  BloomFilter bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
        ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }

  // Printing delete bloom information
  bloomMeta = reader.getDeleteBloomFilterMetadata();
  bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Delete Family Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES
        + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
            "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }
}
项目:hbase-0.94.8-qod    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:DominoHBase    文件:HFilePrettyPrinter.java   
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
    throws IOException {
  System.out.println("Block index size as per heapsize: "
      + reader.indexSize());
  System.out.println(asSeparateLines(reader.toString()));
  System.out.println("Trailer:\n    "
      + asSeparateLines(reader.getTrailer().toString()));
  System.out.println("Fileinfo:");
  for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
    System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
    if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
      long seqid = Bytes.toLong(e.getValue());
      System.out.println(seqid);
    } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(e.getValue(), timeRangeTracker);
      System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
          + timeRangeTracker.getMaximumTimestamp());
    } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
        || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
      System.out.println(Bytes.toInt(e.getValue()));
    } else {
      System.out.println(Bytes.toStringBinary(e.getValue()));
    }
  }

  System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));

  // Printing general bloom information
  DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
  BloomFilter bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
        ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }

  // Printing delete bloom information
  bloomMeta = reader.getDeleteBloomFilterMetadata();
  bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Delete Family Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES
        + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
            "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }
}
项目:DominoHBase    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}
项目:hindex    文件:HFilePrettyPrinter.java   
private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
    throws IOException {
  System.out.println("Block index size as per heapsize: "
      + reader.indexSize());
  System.out.println(asSeparateLines(reader.toString()));
  System.out.println("Trailer:\n    "
      + asSeparateLines(reader.getTrailer().toString()));
  System.out.println("Fileinfo:");
  for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
    System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
    if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
      long seqid = Bytes.toLong(e.getValue());
      System.out.println(seqid);
    } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
      TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
      Writables.copyWritable(e.getValue(), timeRangeTracker);
      System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
          + timeRangeTracker.getMaximumTimestamp());
    } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
        || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
      System.out.println(Bytes.toInt(e.getValue()));
    } else {
      System.out.println(Bytes.toStringBinary(e.getValue()));
    }
  }

  System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));

  // Printing general bloom information
  DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
  BloomFilter bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
        ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }

  // Printing delete bloom information
  bloomMeta = reader.getDeleteBloomFilterMetadata();
  bloomFilter = null;
  if (bloomMeta != null)
    bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);

  System.out.println("Delete Family Bloom filter:");
  if (bloomFilter != null) {
    System.out.println(FOUR_SPACES
        + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
            "\n" + FOUR_SPACES));
  } else {
    System.out.println(FOUR_SPACES + "Not present");
  }
}
项目:hindex    文件:TestCompoundBloomFilter.java   
private void readStoreFile(int t, BloomType bt, List<KeyValue> kvs,
    Path sfPath) throws IOException {
  StoreFile sf = new StoreFile(fs, sfPath, conf, cacheConf, bt,
      NoOpDataBlockEncoder.INSTANCE);
  StoreFile.Reader r = sf.createReader();
  final boolean pread = true; // does not really matter
  StoreFileScanner scanner = r.getStoreFileScanner(true, pread);

  {
    // Test for false negatives (not allowed).
    int numChecked = 0;
    for (KeyValue kv : kvs) {
      byte[] row = kv.getRow();
      boolean present = isInBloom(scanner, row, kv.getQualifier());
      assertTrue(testIdMsg + " Bloom filter false negative on row "
          + Bytes.toStringBinary(row) + " after " + numChecked
          + " successful checks", present);
      ++numChecked;
    }
  }

  // Test for false positives (some percentage allowed). We test in two modes:
  // "fake lookup" which ignores the key distribution, and production mode.
  for (boolean fakeLookupEnabled : new boolean[] { true, false }) {
    ByteBloomFilter.setFakeLookupMode(fakeLookupEnabled);
    try {
      String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?
          "enabled" : "disabled");
      CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();
      cbf.enableTestingStats();
      int numFalsePos = 0;
      Random rand = new Random(EVALUATION_SEED);
      int nTrials = NUM_KV[t] * 10;
      for (int i = 0; i < nTrials; ++i) {
        byte[] query = TestHFileWriterV2.randomRowOrQualifier(rand);
        if (isInBloom(scanner, query, bt, rand)) {
          numFalsePos += 1;
        }
      }
      double falsePosRate = numFalsePos * 1.0 / nTrials;
      LOG.debug(String.format(testIdMsg
          + " False positives: %d out of %d (%f)",
          numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);

      // Check for obvious Bloom filter crashes.
      assertTrue("False positive is too high: " + falsePosRate + " (greater "
          + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,
          falsePosRate < TOO_HIGH_ERROR_RATE);

      // Now a more precise check to see if the false positive rate is not
      // too high. The reason we use a relaxed restriction for the real-world
      // case as opposed to the "fake lookup" case is that our hash functions
      // are not completely independent.

      double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;
      validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,
          fakeLookupModeStr);

      // For checking the lower bound we need to eliminate the last chunk,
      // because it is frequently smaller and the false positive rate in it
      // is too low. This does not help if there is only one under-sized
      // chunk, though.
      int nChunks = cbf.getNumChunks();
      if (nChunks > 1) {
        numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);
        nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);
        falsePosRate = numFalsePos * 1.0 / nTrials;
        LOG.info(testIdMsg + " False positive rate without last chunk is " +
            falsePosRate + fakeLookupModeStr);
      }

      validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,
          fakeLookupModeStr);
    } finally {
      ByteBloomFilter.setFakeLookupMode(false);
    }
  }

  r.close(true); // end of test so evictOnClose
}