private ByteBuffer encodeBufferToHFileBlockBuffer(ByteBuffer in, DataBlockEncoding algo, boolean includesMemstoreTS, byte[] dummyHeader) { ByteArrayOutputStream encodedStream = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(encodedStream); DataBlockEncoder encoder = algo.getEncoder(); try { encodedStream.write(dummyHeader); algo.writeIdInBytes(dataOut); encoder.compressKeyValues(dataOut, in, includesMemstoreTS); } catch (IOException e) { throw new RuntimeException(String.format("Bug in data block encoder " + "'%s', it probably requested too much data", algo.toString()), e); } return ByteBuffer.wrap(encodedStream.toByteArray()); }
/** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. * * @param newBlock the block to make current */ private void updateCurrentBlock(HFileBlock newBlock) { block = newBlock; // sanity checks if (block.getBlockType() != BlockType.ENCODED_DATA) { throw new IllegalStateException( "EncodedScannerV2 works only on encoded data blocks"); } short dataBlockEncoderId = block.getDataBlockEncodingId(); if (dataBlockEncoder == null || !DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { DataBlockEncoder encoder = DataBlockEncoding.getDataBlockEncoderById(dataBlockEncoderId); setDataBlockEncoder(encoder); } seeker.setCurrentBuffer(getEncodedBuffer(newBlock)); blockFetches++; // Reset the next indexed key this.nextIndexedKey = null; }
/** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. * * @param newBlock the block to make current */ private void updateCurrentBlock(HFileBlock newBlock) { block = newBlock; // sanity checks if (block.getBlockType() != BlockType.ENCODED_DATA) { throw new IllegalStateException( "EncodedScannerV2 works only on encoded data blocks"); } short dataBlockEncoderId = block.getDataBlockEncodingId(); if (dataBlockEncoder == null || !DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { DataBlockEncoder encoder = DataBlockEncoding.getDataBlockEncoderById(dataBlockEncoderId); setDataBlockEncoder(encoder); } seeker.setCurrentBuffer(getEncodedBuffer(newBlock)); blockFetches++; }
@Override public HFileBlockEncodingContext newDataBlockEncodingContext( byte[] dummyHeader, HFileContext fileContext) { DataBlockEncoder encoder = encoding.getEncoder(); if (encoder != null) { return encoder.newDataBlockEncodingContext(encoding, dummyHeader, fileContext); } return new HFileBlockDefaultEncodingContext(null, dummyHeader, fileContext); }
@Override public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext fileContext) { DataBlockEncoder encoder = encoding.getEncoder(); if (encoder != null) { return encoder.newDataBlockDecodingContext(fileContext); } return new HFileBlockDefaultDecodingContext(fileContext); }
/** * Encode a block of key value pairs. * * @param in input data to encode * @param algo encoding algorithm * @param encodeCtx where will the output data be stored */ private void encodeBufferToHFileBlockBuffer(ByteBuffer in, DataBlockEncoding algo, HFileBlockEncodingContext encodeCtx) { DataBlockEncoder encoder = algo.getEncoder(); try { encoder.encodeKeyValues(in, encodeCtx); } catch (IOException e) { throw new RuntimeException(String.format( "Bug in data block encoder " + "'%s', it probably requested too much data, " + "exception message: %s.", algo.toString(), e.getMessage()), e); } }
/** * Encode a block of key value pairs. * * @param in input data to encode * @param algo encoding algorithm * @param includesMemstoreTS includes memstore timestamp or not * @param encodeCtx where will the output data be stored */ private void encodeBufferToHFileBlockBuffer(ByteBuffer in, DataBlockEncoding algo, boolean includesMemstoreTS, HFileBlockEncodingContext encodeCtx) { DataBlockEncoder encoder = algo.getEncoder(); try { encoder.encodeKeyValues(in, includesMemstoreTS, encodeCtx); } catch (IOException e) { throw new RuntimeException(String.format( "Bug in data block encoder " + "'%s', it probably requested too much data, " + "exception message: %s.", algo.toString(), e.getMessage()), e); } }
@Override public HFileBlockEncodingContext newDataBlockEncodingContext( Algorithm compressionAlgorithm, byte[] dummyHeader) { DataBlockEncoder encoder = encoding.getEncoder(); if (encoder != null) { return encoder.newDataBlockEncodingContext( compressionAlgorithm, encoding, dummyHeader); } return new HFileBlockDefaultEncodingContext( compressionAlgorithm, null, dummyHeader); }
@Override public HFileBlockDecodingContext newDataBlockDecodingContext( Algorithm compressionAlgorithm) { DataBlockEncoder encoder = encoding.getEncoder(); if (encoder != null) { return encoder.newDataBlockDecodingContext(compressionAlgorithm); } return new HFileBlockDefaultDecodingContext(compressionAlgorithm); }
@Override public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext( Algorithm compressionAlgorithm, byte[] dummyHeader) { if (onDisk != null) { DataBlockEncoder encoder = onDisk.getEncoder(); if (encoder != null) { return encoder.newDataBlockEncodingContext( compressionAlgorithm, onDisk, dummyHeader); } } return new HFileBlockDefaultEncodingContext(compressionAlgorithm, null, dummyHeader); }
@Override public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext( Algorithm compressionAlgorithm) { if (onDisk != null) { DataBlockEncoder encoder = onDisk.getEncoder(); if (encoder != null) { return encoder.newDataBlockDecodingContext( compressionAlgorithm); } } return new HFileBlockDefaultDecodingContext(compressionAlgorithm); }
/** * Check statistics for given HFile for different data block encoders. * @param scanner Of file which will be compressed. * @param kvLimit Maximal count of KeyValue which will be processed. * @throws IOException thrown if scanner is invalid */ public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException { scanner.seek(KeyValue.LOWESTKEY); KeyValue currentKV; byte[] previousKey = null; byte[] currentKey; DataBlockEncoding[] encodings = DataBlockEncoding.values(); ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream(); int j = 0; while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) { // Iterates through key/value pairs j++; currentKey = currentKV.getKey(); if (previousKey != null) { for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) { totalKeyRedundancyLength++; } } uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength()); previousKey = currentKey; int kLen = currentKV.getKeyLength(); int vLen = currentKV.getValueLength(); int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset()); int restLen = currentKV.getLength() - kLen - vLen; totalKeyLength += kLen; totalValueLength += vLen; totalPrefixLength += restLen; totalCFLength += cfLen; } rawKVs = uncompressedOutputStream.toByteArray(); boolean useTag = (currentKV.getTagsLength() > 0); for (DataBlockEncoding encoding : encodings) { if (encoding == DataBlockEncoding.NONE) { continue; } DataBlockEncoder d = encoding.getEncoder(); HFileContext meta = new HFileContextBuilder() .withCompression(Compression.Algorithm.NONE) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(useTag).build(); codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta )); } }
private void setDataBlockEncoder(DataBlockEncoder dataBlockEncoder) { this.dataBlockEncoder = dataBlockEncoder; seeker = dataBlockEncoder.createSeeker(reader.getComparator(), includesMemstoreTS); }
/** * Check statistics for given HFile for different data block encoders. * @param scanner Of file which will be compressed. * @param kvLimit Maximal count of KeyValue which will be processed. * @throws IOException thrown if scanner is invalid */ public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException { scanner.seek(KeyValue.LOWESTKEY); KeyValue currentKv; byte[] previousKey = null; byte[] currentKey; List<DataBlockEncoder> dataBlockEncoders = DataBlockEncoding.getAllEncoders(); for (DataBlockEncoder d : dataBlockEncoders) { codecs.add(new EncodedDataBlock(d, includesMemstoreTS)); } int j = 0; while ((currentKv = scanner.next()) != null && j < kvLimit) { // Iterates through key/value pairs j++; currentKey = currentKv.getKey(); if (previousKey != null) { for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) { totalKeyRedundancyLength++; } } for (EncodedDataBlock codec : codecs) { codec.addKv(currentKv); } previousKey = currentKey; totalPrefixLength += currentKv.getLength() - currentKv.getKeyLength() - currentKv.getValueLength(); totalKeyLength += currentKv.getKeyLength(); totalValueLength += currentKv.getValueLength(); } }
/** * Check statistics for given HFile for different data block encoders. * @param scanner Of file which will be compressed. * @param kvLimit Maximal count of KeyValue which will be processed. * @throws IOException thrown if scanner is invalid */ public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException { scanner.seek(KeyValue.LOWESTKEY); KeyValue currentKV; byte[] previousKey = null; byte[] currentKey; DataBlockEncoding[] encodings = DataBlockEncoding.values(); ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream(); int j = 0; while ((currentKV = scanner.next()) != null && j < kvLimit) { // Iterates through key/value pairs j++; currentKey = currentKV.getKey(); if (previousKey != null) { for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) { totalKeyRedundancyLength++; } } uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength()); previousKey = currentKey; int kLen = currentKV.getKeyLength(); int vLen = currentKV.getValueLength(); int cfLen = currentKV.getFamilyLength(currentKV.getFamilyOffset()); int restLen = currentKV.getLength() - kLen - vLen; totalKeyLength += kLen; totalValueLength += vLen; totalPrefixLength += restLen; totalCFLength += cfLen; } rawKVs = uncompressedOutputStream.toByteArray(); boolean useTag = (currentKV.getTagsLength() > 0); for (DataBlockEncoding encoding : encodings) { if (encoding == DataBlockEncoding.NONE) { continue; } DataBlockEncoder d = encoding.getEncoder(); HFileContext meta = new HFileContextBuilder() .withCompression(Compression.Algorithm.NONE) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(useTag).build(); codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta )); } }
static void writeEncodedBlock(Algorithm algo, DataBlockEncoding encoding, DataOutputStream dos, final List<Integer> encodedSizes, final List<ByteBuffer> encodedBlocks, int blockId, boolean includesMemstoreTS, byte[] dummyHeader, boolean useTag) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DoubleOutputStream doubleOutputStream = new DoubleOutputStream(dos, baos); writeTestKeyValues(doubleOutputStream, blockId, includesMemstoreTS, useTag); ByteBuffer rawBuf = ByteBuffer.wrap(baos.toByteArray()); rawBuf.rewind(); DataBlockEncoder encoder = encoding.getEncoder(); int headerLen = dummyHeader.length; byte[] encodedResultWithHeader = null; HFileContext meta = new HFileContextBuilder() .withCompression(algo) .withIncludesMvcc(includesMemstoreTS) .withIncludesTags(useTag) .build(); if (encoder != null) { HFileBlockEncodingContext encodingCtx = encoder.newDataBlockEncodingContext(encoding, dummyHeader, meta); encoder.encodeKeyValues(rawBuf, encodingCtx); encodedResultWithHeader = encodingCtx.getUncompressedBytesWithHeader(); } else { HFileBlockDefaultEncodingContext defaultEncodingCtx = new HFileBlockDefaultEncodingContext( encoding, dummyHeader, meta); byte[] rawBufWithHeader = new byte[rawBuf.array().length + headerLen]; System.arraycopy(rawBuf.array(), 0, rawBufWithHeader, headerLen, rawBuf.array().length); defaultEncodingCtx.compressAfterEncodingWithBlockType(rawBufWithHeader, BlockType.DATA); encodedResultWithHeader = defaultEncodingCtx.getUncompressedBytesWithHeader(); } final int encodedSize = encodedResultWithHeader.length - headerLen; if (encoder != null) { // We need to account for the two-byte encoding algorithm ID that // comes after the 24-byte block header but before encoded KVs. headerLen += DataBlockEncoding.ID_SIZE; } byte[] encodedDataSection = new byte[encodedResultWithHeader.length - headerLen]; System.arraycopy(encodedResultWithHeader, headerLen, encodedDataSection, 0, encodedDataSection.length); final ByteBuffer encodedBuf = ByteBuffer.wrap(encodedDataSection); encodedSizes.add(encodedSize); encodedBlocks.add(encodedBuf); }