Java 类org.apache.hadoop.util.bloom.Key 实例源码

项目:DStream    文件:SplitterBolt.java   
public void execute(Tuple tuple) {
    if(tuple.getSourceComponent().equals(UPSTREAM_COMPONENT_ID)){
        String word = tuple.getStringByField(UPSTREAM_FIEDLS);
        if(word.length() <= 0) {
            collector.ack(tuple);
            return;
        }
        collector.emit(Constraints.coinFileds, new Values(word));
        Key ky = new Key(word.getBytes());
        if(bf.membershipTest(ky))
            collector.emit(Constraints.hotFileds, tuple, new Values(word));
        else
            collector.emit(Constraints.nohotFileds, tuple, new Values(word));

    }else {
        String key = tuple.getStringByField(Constraints.wordFileds);
        Integer type = tuple.getIntegerByField(Constraints.typeFileds);
        Key hk = new Key(key.getBytes());
        if(!bf.membershipTest(hk) && type.equals(1))
            bf.add(hk);
        if(bf.membershipTest(hk) && type.equals(0))
            bf.delete(hk);
    }
    collector.ack(tuple);
}
项目:Gaffer    文件:FilterWritabilityTest.java   
@Test
public void shouldWriteAndReadFilter() throws IOException {
    // Given
    final BloomFilter filter = new BloomFilter(100, 5, Hash.MURMUR_HASH);
    filter.add(new Key("ABC".getBytes()));
    filter.add(new Key("DEF".getBytes()));
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final DataOutputStream out = new DataOutputStream(baos);
    filter.write(out);
    String x = new String(baos.toByteArray(), AccumuloStoreConstants.BLOOM_FILTER_CHARSET);
    final ByteArrayInputStream bais = new ByteArrayInputStream(x.getBytes(AccumuloStoreConstants.BLOOM_FILTER_CHARSET));

    // When
    final DataInputStream in = new DataInputStream(bais);
    final BloomFilter read = new BloomFilter();
    read.readFields(in);

    // Then
    assertTrue(read.membershipTest(new Key("ABC".getBytes())));
    assertTrue(read.membershipTest(new Key("DEF".getBytes())));
    assertFalse(read.membershipTest(new Key("lkjhgfdsa".getBytes())));
}
项目:compiler    文件:UniqueAggregator.java   
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
    // instantiate a bloom filter input key initialized by the data
    final Key key = new Key(data.getBytes());

    // if the key is already in the filter, forget about it
    if (this.filter.membershipTest(key))
        return;

    // add the key to the bloom filter
    this.filter.add(key);

    if (this.isCombining())
        this.collect(data);
    else
        this.total++;
}
项目:compiler    文件:DistinctAggregator.java   
/** {@inheritDoc} */
@Override
public void aggregate(final String data, final String metadata) throws IOException, InterruptedException {
    // instantiate a bloom filter input key initialized by the data
    Key key = new Key(data.getBytes());

    // if the key is already in the filter, forget it
    if (this.filter.membershipTest(key))
        return;

    // add the key to the bloom filter
    this.filter.add(key);

    // and collect it
    this.collect(data);
}
项目:spork-streaming    文件:BuildBloom.java   
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
项目:spork    文件:BuildBloom.java   
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
项目:hadoop-map-reduce-patterns    文件:BloomFilter.java   
@Override
public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
    Map<String, String> parsed = transformXmlToMap(value.toString());

    String body = parsed.get("Text");
    if (isNullOrEmpty(body)) {
        return;
    }
    StringTokenizer tokenizer = new StringTokenizer(body);
    while (tokenizer.hasMoreTokens()) {
        String word = tokenizer.nextToken();
        if (filter.membershipTest(new Key(word.getBytes()))) {
            context.write(value, NullWritable.get());
            break;
        }
    }

}
项目:PonIC    文件:BuildBloom.java   
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
项目:sedge    文件:BuildBloom.java   
@Override
public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) return null;

    // Strip off the initial level of bag
    DataBag values = (DataBag)input.get(0);
    Iterator<Tuple> it = values.iterator();
    Tuple t = it.next();

    // If the input tuple has only one field, then we'll extract
    // that field and serialize it into a key.  If it has multiple
    // fields, we'll serialize the whole tuple.
    byte[] b;
    if (t.size() == 1) b = DataType.toBytes(t.get(0));
    else b = DataType.toBytes(t, DataType.TUPLE);

    Key k = new Key(b);
    filter = new BloomFilter(vSize, numHash, hType);
    filter.add(k);

    return TupleFactory.getInstance().newTuple(bloomOut());
}
项目:hiped2    文件:BloomJoin.java   
@Override
protected void map(Text key, Text value, Context context)
    throws IOException, InterruptedException {
  System.out.println("K[" + key + "]");
  if(filter.membershipTest(new Key(key.toString().getBytes()))) {
    context.write(key, value);
  }
}
项目:hiped2    文件:BloomFilterCreator.java   
@Override
public void map(Text key, Text value,
                OutputCollector<NullWritable, BloomFilter> output,
                Reporter reporter) throws IOException {

  System.out.println("K[" + key + "]");

  int age = Integer.valueOf(value.toString());
  if (age > 30) {
    filter.add(new Key(key.toString().getBytes()));
  }
  collector = output;
}
项目:hiped2    文件:BloomJoin.java   
@Override
protected void map(LongWritable offset, Text value, Context context)
    throws IOException, InterruptedException {
  String user = getUsername(value);
  if (filter.membershipTest(new Key(user.getBytes()))) {
    Tuple outputValue = new Tuple();
    outputValue.setInt(ValueFields.DATASET, getDataset());
    outputValue.setString(ValueFields.DATA, value.toString());

    context.write(new Text(user), outputValue);
  }
}
项目:hiped2    文件:BloomFilterCreator.java   
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  User user = User.fromText(value);
  if ("CA".equals(user.getState())) {
    filter.add(new Key(user.getName().getBytes()));
  }
}
项目:Gaffer    文件:FilterWritabilityTest.java   
@Test
public void shouldAcceptValidFilter() {
    // Given
    final BloomFilter filter = new BloomFilter(100, 5, Hash.MURMUR_HASH);
    filter.add(new Key("ABC".getBytes()));
    filter.add(new Key("DEF".getBytes()));

    // Then
    assertTrue(filter.membershipTest(new Key("ABC".getBytes())));
    assertTrue(filter.membershipTest(new Key("DEF".getBytes())));
    assertFalse(filter.membershipTest(new Key("lkjhgfdsa".getBytes())));
}
项目:spring-usc    文件:KR2RMLBloomFilterManager.java   
public void addUriToBloomFilter(String id, String uri) {
    KR2RMLBloomFilter bf = null;
    if(!idToBloomFilter.containsKey(id))
    {
        idToBloomFilter.putIfAbsent(id, new KR2RMLBloomFilter(KR2RMLBloomFilter.defaultVectorSize, KR2RMLBloomFilter.defaultnbHash, Hash.JENKINS_HASH));
    }
    bf = idToBloomFilter.get(id);

    Key k = new Key(uri.getBytes(UTF8_CHARSET));
    bf.add(k);
    return;
}
项目:spork-streaming    文件:Bloom.java   
@Override
public Boolean exec(Tuple input) throws IOException {
    if (filter == null) {
        init();
    }
    byte[] b;
    if (input.size() == 1) b = DataType.toBytes(input.get(0));
    else b = DataType.toBytes(input, DataType.TUPLE);

    Key k = new Key(b);
    return filter.membershipTest(k);
}
项目:spork    文件:Bloom.java   
@Override
public Boolean exec(Tuple input) throws IOException {
    if (filter == null) {
        init();
    }
    byte[] b;
    if (input.size() == 1) b = DataType.toBytes(input.get(0));
    else b = DataType.toBytes(input, DataType.TUPLE);

    Key k = new Key(b);
    return filter.membershipTest(k);
}
项目:cbflume    文件:CBMessageFilter.java   
public CBMessageFilter(boolean filterEnabled, String filterPattern){
    this.filterEnabled=filterEnabled;

    if(this.filterEnabled){
    String[] filters=filterPattern.split(",");
      for(String key1:filters){
          if(key1!=null && key1.trim().length()>0){
              Key key=new Key(key1.getBytes());
              filter.add(key);
          }
      }
    }
}
项目:cbflume    文件:CBMessageFilter.java   
public boolean membershiptest(String key){
    //return everythig if filter not enabled.
    if(!this.filterEnabled) return true;
    //System.out.println("Comparing key "+key);
    if(key==null) return false;
    return filter.membershipTest(new Key(key.getBytes()));
}
项目:hadoop-map-reduce-patterns    文件:ReduceSideJoinBloomFilter.java   
public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
    Map<String, String> parsed = MRDPUtils.transformXmlToMap(value
            .toString());
    String userId = parsed.get("UserId");
    if (userId == null) {
        return;
    }
    if (bfilter.membershipTest(new Key(userId.getBytes()))) {
        outkey.set(userId);
        outvalue.set("B" + value.toString());
        context.write(outkey, outvalue);
    }
}
项目:PonIC    文件:Bloom.java   
@Override
public Boolean exec(Tuple input) throws IOException {
    if (filter == null) {
        init();
    }
    byte[] b;
    if (input.size() == 1) b = DataType.toBytes(input.get(0));
    else b = DataType.toBytes(input, DataType.TUPLE);

    Key k = new Key(b);
    return filter.membershipTest(k);
}
项目:sedge    文件:Bloom.java   
@Override
public Boolean exec(Tuple input) throws IOException {
    if (filter == null) {
        init();
    }
    byte[] b;
    if (input.size() == 1) b = DataType.toBytes(input.get(0));
    else b = DataType.toBytes(input, DataType.TUPLE);

    Key k = new Key(b);
    return filter.membershipTest(k);
}
项目:bloomfilter-course    文件:Tester.java   
public void redisMembershipTestWithFilter(Path input, Path bloom)
        throws Exception {

    System.out.println("Testing Redis set membership of " + input
            + " using a Bloom filter " + bloom);

    // TODO create a fileSystem object
    FileSystem fs = FileSystem.get(getConf());

    // TODO connect to Redis at localhost, port 6379
    jedis = new Jedis("localhost", 6379);
    jedis.connect();

    // TODO Create a new BloomFilter object
    BloomFilter filter = new BloomFilter();

    // TODO call readFields with an FSDataInputStream from the file
    filter.readFields(fs.open(bloom));

    // TODO Open the testing file for read
    String line = null;
    int numBFhits = 0, numhits = 0, numlines = 0;
    BufferedReader rdr = new BufferedReader(new InputStreamReader(
            fs.open(input)));

    // TODO create a new Key to re-use
    Key key = new Key();

    long start = System.currentTimeMillis();
    while ((line = rdr.readLine()) != null) {
        // TODO increment numlines
        ++numlines;

        // TODO set the bytes of the key to line's bytes with a weight of 1.0
        key.set(line.getBytes(), 1.0);

        // TODO membership test the key
        if (filter.membershipTest(key)) {
            // TODO increment numBFhits
            ++numBFhits;

            // TODO test jedis using sismember
            if (jedis.sismember(REDIS_SET_KEY, line)) {
                // TODO increment numhits
                ++numhits;
            }
        }
    }
    long finish = System.currentTimeMillis();

    // TODO close the file reader and Redis client
    rdr.close();
    jedis.disconnect();

    System.out.println("Took " + (finish - start) + " ms to check Redis "
            + numlines + " times for " + numhits
            + " successful tests.  Bloom filter hits: " + numBFhits
            + " False postives: " + (numBFhits - numhits));
}
项目:bloomfilter-course    文件:Trainer.java   
@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        System.err
                .println("Usage: Trainer <totrain> <nummembers> <falseposrate> <bfoutfile>");
        return 1;
    }

    // Parse command line arguments
    Path inputFile = new Path(args[0]);
    int numMembers = Integer.parseInt(args[1]);
    float falsePosRate = Float.parseFloat(args[2]);
    Path bfFile = new Path(args[3]);

    // TODO Create a new Jedis object using localhost at port 6379
    jedis = new Jedis("localhost", 6379);

    // TODO delete the REDIS_SET_KEY
    jedis.del(REDIS_SET_KEY);

    // TODO Create a new Bloom filter
    BloomFilter filter = createBloomFilter(numMembers, falsePosRate);

    // TODO open the file for read
    FileSystem fs = FileSystem.get(getConf());
    String line = null;
    int numRecords = 0;
    BufferedReader rdr = new BufferedReader(new InputStreamReader(
            fs.open(inputFile)));

    while ((line = rdr.readLine()) != null) {
        // TODO if the line is not empty
        if (!line.isEmpty()) {
            // TODO add the line to the Bloom filter
            filter.add(new Key(line.getBytes()));

            // TODO use Jedis client's "sadd" method to set
            jedis.sadd(REDIS_SET_KEY, line);

            // TODO increment numRecords
            ++numRecords;
        }
    }

    // TODO Close reader, disconnect Jedis client
    rdr.close();
    jedis.disconnect();

    System.out.println("Trained Bloom filter with " + numRecords
            + " entries.");

    System.out.println("Serializing Bloom filter to HDFS at " + bfFile);

    // TODO create anew FSDataOutputStream using the FileSystem
    FSDataOutputStream strm = fs.create(bfFile);

    // TODO pass the stream to the Bloom filter
    filter.write(strm);

    // TODO close the stream
    strm.flush();
    strm.close();

    System.out.println("Done training Bloom filter.");
    return 0;
}
项目:bloomfilter-course    文件:Tester.java   
public void redisMembershipTestWithFilter(Path input, Path bloom)
        throws Exception {

    System.out.println("Testing Redis set membership of " + input
            + " using a Bloom filter " + bloom);

    // TODO create a fileSystem object

    // TODO connect to Redis at localhost, port 6379

    // TODO Create a new BloomFilter object

    // TODO call readFields with an FSDataInputStream from the file

    // TODO Open the testing file for read
    String line = null;
    int numBFhits = 0, numhits = 0, numlines = 0;
    BufferedReader rdr = null;

    // TODO create a new Key to re-use
    Key key = new Key();

    long start = System.currentTimeMillis();
    while ((line = rdr.readLine()) != null) {
        // TODO increment numlines

        // TODO set the bytes of the key to line's bytes with a weight of 1.0

        // TODO membership test the key
            // TODO increment numBFhits

            // TODO test jedis using sismember
                // TODO increment numhits
    }
    long finish = System.currentTimeMillis();

    // TODO close the file reader and Redis client

    System.out.println("Took " + (finish - start) + " ms to check Redis "
            + numlines + " times for " + numhits
            + " successful tests.  Bloom filter hits: " + numBFhits
            + " False postives: " + (numBFhits - numhits));
}
项目:telepath    文件:InBloomFilterMapper.java   
public static Key toKey(Text t) {
    return new Key(Arrays.copyOfRange(t.getBytes(), 0, t.getLength()));
}
项目:telepath    文件:InBloomFilterMapper.java   
public static Key toKey(String s) {
    return toKey(new Text(s));
}