public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException { job.setInputFormatClass(inputFormatClass); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); conf.set(TableInputFormat.INPUT_TABLE, table); conf.set(TableInputFormat.SCAN, convertScanToString(scan)); if (addDependencyJars) { addDependencyJars(job); } TableMapReduceUtil.initCredentials(job); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table1, String table2, Scan scan1, Scan scan2, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException { job.setInputFormatClass(inputFormatClass); if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass); if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); Configuration conf = job.getConfiguration(); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); conf.set(TableInputFormat.INPUT_TABLE1, table1); conf.set(TableInputFormat.INPUT_TABLE2, table2); conf.set(TableInputFormat.SCAN1, convertScanToString(scan1)); conf.set(TableInputFormat.SCAN2, convertScanToString(scan2)); if (addDependencyJars) { addDependencyJars(job); } initCredentials(job); }
@SuppressWarnings("rawtypes") private Job startBulkLoad(Configuration conf, String inputTable, String tableName, Class<? extends TableMapper> clazz, Path outputDir) throws Exception { // Create our job to bulk load into HBase Job job = Job.getInstance(conf, "HBase Bulk Loader"); job.setJarByClass(getClass()); // Initialize our mapper by specifying the input table TableMapReduceUtil.initTableMapperJob(inputTable, new Scan(), clazz, ImmutableBytesWritable.class, KeyValue.class, job); HFileOutputFormat.configureIncrementalLoad(job, new HTable(conf, tableName)); HFileOutputFormat.setOutputPath(job, outputDir); // launch the job job.waitForCompletion(true); return job; }
public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job) throws IOException { initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job, true); }
public static void initTableMapperJob(byte[] table, Scan scan, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job) throws IOException { initTableMapperJob(Bytes.toString(table), scan, mapper, outputKeyClass, outputValueClass, job, true); }
public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars) throws IOException { initTableMapperJob(table, scan, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, ThemisTableInputFormat.class); }
public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job) throws IOException { initTableMapperJob(scans, mapper, outputKeyClass, outputValueClass, job, true); }
public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars) throws IOException { job.setInputFormatClass(MultiThemisTableInputFormat.class); if (outputValueClass != null) { job.setMapOutputValueClass(outputValueClass); } if (outputKeyClass != null) { job.setMapOutputKeyClass(outputKeyClass); } job.setMapperClass(mapper); HBaseConfiguration.addHbaseResources(job.getConfiguration()); List<String> scanStrings = new ArrayList<String>(); for (Scan scan : scans) { scanStrings.add(convertScanToString(scan)); } job.getConfiguration().setStrings(MultiTableInputFormat.SCANS, scanStrings.toArray(new String[scanStrings.size()])); if (addDependencyJars) { addDependencyJars(job); } TableMapReduceUtil.initCredentials(job); }
/** * @param inputTable input table * @param outputPath output path * @param scanner table row scanner object * @param mapper mapper class * @param mapperKey table mapper key * @param mapperValue table mapper value * @param reducer reducer class * @param reducerKey reducer key * @param reducerValue reducer value * @param outputFormat output (file) format * @return a ready to run job - unless you need to assign more job specific settings * @throws IOException */ @SuppressWarnings("rawtypes") public Job prepareTableMapper(String inputTable, Path outputPath, Scan scanner, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> mapperKey, Class<? extends WritableComparable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { setInputTable(inputTable); // Configures the Job & starts it: Configuration conf = getConf(); Job job = new Job(conf); job.setJobName(getCustomJobName(job, mapper, reducer)); // mapper: TableMapReduceUtil.initTableMapperJob(getInputTable(), scanner, mapper, mapperKey, mapperValue, job); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); FileOutputFormat.setOutputPath(job, outputPath); return job; }
@SuppressWarnings("rawtypes") public static Class<? extends TableMapper> wrap(String type) { Class<? extends TableMapper> c = IndexMapper.class; if (null != type && type.length() > 0) { if (type.equals(Const.MAPPER_TYPE_JSON)) c = IndexJsonMapper.class; else if (type.equals(Const.MAPPER_TYPE_ROWKEY)) c = IndexRowkeyMapper.class; } return c; }
public TableMapDriver<ImmutableBytesWritable, IntWritable> createMapDriver(TableMapper<ImmutableBytesWritable, IntWritable> tableMapper) { TableMapDriver<ImmutableBytesWritable, IntWritable> mapDriver = TableMapDriver.newTableMapDriver(tableMapper); configure(mapDriver.getConfiguration()); return mapDriver; }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table1,String table2, Scan scan1,Scan scan2, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job) throws IOException { initTableMapperJob(table1,table2, scan1,scan2, mapper, outputKeyClass, outputValueClass, job, true); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table Binary representation of the table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(byte[] table1,byte[] table2, Scan scan1,Scan scan2, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job) throws IOException { initTableMapperJob(Bytes.toString(table1),Bytes.toString(table2), scan1,scan2, mapper, outputKeyClass, outputValueClass, job, true); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table Binary representation of the table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @param inputFormatClass The class of the input format * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(byte[] table1,byte[] table2, Scan scan1,Scan scan2, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass) throws IOException { initTableMapperJob(Bytes.toString(table1), Bytes.toString(table2), scan1, scan2, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, inputFormatClass); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table Binary representation of the table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(byte[] table1,byte[] table2, Scan scan1,Scan scan2, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars) throws IOException { initTableMapperJob(Bytes.toString(table1),Bytes.toString(table2), scan1,scan2, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }
/** * Use this before submitting a TableMap job. It will appropriately set up * the job. * * @param table The table name to read from. * @param scan The scan instance with the columns, time range etc. * @param mapper The mapper class to use. * @param outputKeyClass The class of the output key. * @param outputValueClass The class of the output value. * @param job The current job to adjust. Make sure the passed job is * carrying all necessary HBase configuration. * @param addDependencyJars upload HBase jars and jars for any of the configured * job classes via the distributed cache (tmpjars). * @throws IOException When setting up the details fails. */ public static void initTableMapperJob(String table1,String table2, Scan scan1, Scan scan2, Class<? extends TableMapper> mapper, Class<? extends WritableComparable> outputKeyClass, Class<? extends Writable> outputValueClass, Job job, boolean addDependencyJars) throws IOException { initTableMapperJob(table1,table2, scan1,scan2, mapper, outputKeyClass, outputValueClass, job, addDependencyJars, TableInputFormat.class); }