/** * This utility method creates a new Hbase HColumnDescriptor object based on a * Thrift ColumnDescriptor "struct". * * @param in * Thrift ColumnDescriptor object * @return HColumnDescriptor * @throws IllegalArgument */ static public HColumnDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument { Compression.Algorithm comp = Compression.getCompressionAlgorithmByName(in.compression.toLowerCase()); StoreFile.BloomType bt = BloomType.valueOf(in.bloomFilterType); if (in.name == null || !in.name.hasRemaining()) { throw new IllegalArgument("column name is empty"); } byte [] parsedName = KeyValue.parseColumn(Bytes.getBytes(in.name))[0]; HColumnDescriptor col = new HColumnDescriptor(parsedName) .setMaxVersions(in.maxVersions) .setCompressionType(comp) .setInMemory(in.inMemory) .setBlockCacheEnabled(in.blockCacheEnabled) .setTimeToLive(in.timeToLive) .setBloomFilterType(bt); return col; }
public static boolean testCompression(String codec) { codec = codec.toLowerCase(); Compression.Algorithm a; try { a = Compression.getCompressionAlgorithmByName(codec); } catch (IllegalArgumentException e) { LOG.warn("Codec type: " + codec + " is not known"); return false; } try { testCompression(a); return true; } catch (IOException ignored) { LOG.warn("Can't instantiate codec: " + codec, ignored); return false; } }
public static void testCompression(Compression.Algorithm algo) throws IOException { if (compressionTestResults[algo.ordinal()] != null) { if (compressionTestResults[algo.ordinal()]) { return ; // already passed test, dont do it again. } else { // failed. throw new IOException("Compression algorithm '" + algo.getName() + "'" + " previously failed test."); } } Configuration conf = HBaseConfiguration.create(); try { Compressor c = algo.getCompressor(); algo.returnCompressor(c); compressionTestResults[algo.ordinal()] = true; // passes } catch (Throwable t) { compressionTestResults[algo.ordinal()] = false; // failure throw new IOException(t); } }
/** * Test for * {@link HFileOutputFormat#createFamilyCompressionMap(Configuration)}. Tests * that the compression map is correctly deserialized from configuration * * @throws IOException */ @Test public void testCreateFamilyCompressionMap() throws IOException { for (int numCfs = 0; numCfs <= 3; numCfs++) { Configuration conf = new Configuration(this.util.getConfiguration()); Map<String, Compression.Algorithm> familyToCompression = getMockColumnFamilies(numCfs); HTable table = Mockito.mock(HTable.class); setupMockColumnFamilies(table, familyToCompression); HFileOutputFormat.configureCompression(table, conf); // read back family specific compression setting from the configuration Map<byte[], String> retrievedFamilyToCompressionMap = HFileOutputFormat.createFamilyCompressionMap(conf); // test that we have a value for all column families that matches with the // used mock values for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) { assertEquals("Compression configuration incorrect for column family:" + entry.getKey(), entry.getValue() .getName(), retrievedFamilyToCompressionMap.get(entry.getKey().getBytes())); } } }
protected void parseColumnFamilyOptions(CommandLine cmd) { String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING); dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null : DataBlockEncoding.valueOf(dataBlockEncodingStr); if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) { throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " + "does not make sense when data block encoding is not used"); } String compressStr = cmd.getOptionValue(OPT_COMPRESSION); compressAlgo = compressStr == null ? Compression.Algorithm.NONE : Compression.Algorithm.valueOf(compressStr); String bloomStr = cmd.getOptionValue(OPT_BLOOM); bloomType = bloomStr == null ? null : StoreFile.BloomType.valueOf(bloomStr); inMemoryCF = cmd.hasOption(OPT_INMEMORY); }
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter( TaskAttemptContext context) throws IOException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputDir.getFileSystem(conf); int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384); // Default to snappy. Compression.Algorithm compressionAlgorithm = getAlgorithm( conf.get(Constants.HFILE_COMPRESSION)); final StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize) .withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId())) .withCompression(compressionAlgorithm) .build(); return new HFileRecordWriter(writer); }
/** * This utility method creates a new Hbase HColumnDescriptor object based on a * Thrift ColumnDescriptor "struct". * * @param in * Thrift ColumnDescriptor object * @return HColumnDescriptor * @throws IllegalArgument */ static public HColumnDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument { Compression.Algorithm comp = Compression.getCompressionAlgorithmByName(in.compression.toLowerCase()); StoreFile.BloomType bt = BloomType.valueOf(in.bloomFilterType); if (in.name == null || !in.name.hasRemaining()) { throw new IllegalArgument("column name is empty"); } byte [] parsedName = KeyValue.parseColumn(Bytes.getBytes(in.name))[0]; HColumnDescriptor col = new HColumnDescriptor(parsedName, in.maxVersions, comp.getName(), in.inMemory, in.blockCacheEnabled, in.timeToLive, bt.toString()); return col; }
/** * Create a store file writer. Client is responsible for closing file when done. * If metadata, add BEFORE closing using appendMetadata() * @param fs * @param dir Path to family directory. Makes the directory if doesn't exist. * Creates a file with a unique name in this directory. * @param blocksize * @param algorithm Pass null to get default. * @param c Pass null to get default. * @param conf HBase system configuration. used with bloom filters * @param cacheConf Cache configuration and reference. * @param bloomType column family setting for bloom filters * @param maxKeyCount estimated maximum number of keys we expect to add * @return HFile.Writer * @throws IOException */ public static StoreFile.Writer createWriter(final FileSystem fs, final Path dir, final int blocksize, final Compression.Algorithm algorithm, final KeyValue.KVComparator c, final Configuration conf, final CacheConfig cacheConf, BloomType bloomType, long maxKeyCount) throws IOException { if (!fs.exists(dir)) { fs.mkdirs(dir); } Path path = getUniqueFile(fs, dir); if (!BloomFilterFactory.isBloomEnabled(conf)) { bloomType = BloomType.NONE; } return new Writer(fs, path, blocksize, algorithm == null? HFile.DEFAULT_COMPRESSION_ALGORITHM: algorithm, conf, cacheConf, c == null ? KeyValue.COMPARATOR: c, bloomType, maxKeyCount); }
/** * Creates an HFile.Writer that also write helpful meta data. * @param fs file system to write to * @param path file name to create * @param blocksize HDFS block size * @param compress HDFS block compression * @param conf user configuration * @param comparator key comparator * @param bloomType bloom filter setting * @param maxKeys the expected maximum number of keys to be added. Was used * for Bloom filter size in {@link HFile} format version 1. * @throws IOException problem writing to FS */ public Writer(FileSystem fs, Path path, int blocksize, Compression.Algorithm compress, final Configuration conf, CacheConfig cacheConf, final KVComparator comparator, BloomType bloomType, long maxKeys) throws IOException { writer = HFile.getWriterFactory(conf, cacheConf).createWriter( fs, path, blocksize, compress, comparator.getRawComparator()); this.kvComparator = comparator; bloomFilterWriter = BloomFilterFactory.createBloomAtWrite(conf, cacheConf, bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); if (bloomFilterWriter != null) { this.bloomType = bloomType; LOG.info("Bloom filter type for " + path + ": " + this.bloomType + ", "+ bloomFilterWriter.getClass().getSimpleName()); } else { // Not using Bloom filters. this.bloomType = BloomType.NONE; } }
private void parseColumnFamilyOptions(CommandLine cmd) { String dataBlockEncodingStr = cmd.getOptionValue(OPT_DATA_BLOCK_ENCODING); dataBlockEncodingAlgo = dataBlockEncodingStr == null ? null : DataBlockEncoding.valueOf(dataBlockEncodingStr); if (dataBlockEncodingAlgo == DataBlockEncoding.NONE && encodeInCacheOnly) { throw new IllegalArgumentException("-" + OPT_ENCODE_IN_CACHE_ONLY + " " + "does not make sense when data block encoding is not used"); } String compressStr = cmd.getOptionValue(OPT_COMPRESSION); compressAlgo = compressStr == null ? Compression.Algorithm.NONE : Compression.Algorithm.valueOf(compressStr); String bloomStr = cmd.getOptionValue(OPT_BLOOM); bloomType = bloomStr == null ? null : StoreFile.BloomType.valueOf(bloomStr); }