Java 类org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 实例源码

项目：Gaffer 文件：HBaseAddElementsFromHdfsJobFactory.java

protected void setupOutput(final Job job, final AddElementsFromHdfs operation, final HBaseStore store) throws IOException {
    FileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
    final String stagingDir = operation.getOption(HBaseStoreConstants.OPERATION_HDFS_STAGING_PATH);
    if (null != stagingDir && !stagingDir.isEmpty()) {
        job.getConfiguration().set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, stagingDir);
    }

    try {
        HFileOutputFormat2.configureIncrementalLoad(
                job,
                store.getTable(),
                store.getConnection().getRegionLocator(store.getTableName())
        );
    } catch (final StoreException e) {
        throw new RuntimeException(e);
    }
}

项目：htools 文件：HBJob.java

public void doBulkLoadSinglePut(boolean verbose, HTable table) throws IOException, ClassNotFoundException, InterruptedException, Exception {
    ClassTools.preLoad(LoadIncrementalHFiles.class);

    // setup the bulkload temp folder
    HDFSPath bulkLoadPath = new HDFSPath(
            getConfiguration(),
            "/tmp/" + UUID.randomUUID().toString());
    if (bulkLoadPath.existsDir()) {
        bulkLoadPath.trash();
    }

    // setup the job
    setMapOutputKeyClass(ImmutableBytesWritable.class);
    setMapOutputValueClass(Put.class);
    HFileOutputFormat2.configureIncrementalLoad(this, table);
    HFileOutputFormat2.setOutputPath(this, bulkLoadPath);
    if (waitForCompletion(verbose)) {
        // Load generated HFiles into table
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        loader.doBulkLoad(bulkLoadPath, table);
    } else {
        log.info("loading failed.");
    }
}

项目：tajo 文件：HFileAppender.java

@Override
public void init() throws IOException {
  super.init();

  Configuration taskConf = new Configuration();
  Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
  taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString());

  ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId();
  writerContext = new TaskAttemptContextImpl(taskConf,
      new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP,
          taskAttemptId.getTaskId().getId(), taskAttemptId.getId()));

  HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2();
  try {
    writer = hFileOutputFormat2.getRecordWriter(writerContext);

    committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext);
    workingFilePath = committer.getWorkPath();
  } catch (InterruptedException e) {
    throw new IOException(e.getMessage(), e);
  }

  LOG.info("Created hbase file writer: " + workingFilePath);
}

项目：Halyard 文件：HalyardBulkLoad.java

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <output_path> <table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
                HalyardTableUtils.truncateTable(hTable).close();
            }
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

项目：Halyard 文件：HalyardHiveLoad.java

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D" + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY + "=true] <hive_table_name> <output_path> <hbase_table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
    int i = args[0].indexOf('.');
    HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
    job.setJarByClass(HalyardHiveLoad.class);
    job.setMapperClass(HiveMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

项目：hgraphdb 文件：IndexTool.java

/**
 * Submits the job and waits for completion.
 * @param job job
 * @param outputPath output path
 * @throws Exception
 */
private void configureRunnableJobUsingBulkLoad(Job job, Path outputPath, TableName outputTableName,
                                               boolean skipDependencyJars) throws Exception {
    job.setMapperClass(getBulkMapperClass());
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    final Configuration configuration = job.getConfiguration();
    try (Connection conn = ConnectionFactory.createConnection(configuration);
         Admin admin = conn.getAdmin();
         Table table = conn.getTable(outputTableName);
         RegionLocator regionLocator = conn.getRegionLocator(outputTableName)) {
        HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
        if (skipDependencyJars) {
            job.getConfiguration().unset("tmpjars");
        }
        boolean status = job.waitForCompletion(true);
        if (!status) {
            LOG.error("IndexTool job failed!");
            throw new Exception("IndexTool job failed: " + job.toString());
        }

        LOG.info("Loading HFiles from {}", outputPath);
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
        loader.doBulkLoad(outputPath, admin, table, regionLocator);
    }
    FileSystem.get(configuration).delete(outputPath, true);
}

项目：hbase 文件：MapReduceHFileSplitterJob.java

/**
 * Sets up the actual job.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(String[] args) throws IOException {
  Configuration conf = getConf();
  String inputDirs = args[0];
  String tabName = args[1];
  conf.setStrings(TABLES_KEY, tabName);
  conf.set(FileInputFormat.INPUT_DIR, inputDirs);
  Job job =
      Job.getInstance(conf,
        conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
  job.setJarByClass(MapReduceHFileSplitterJob.class);
  job.setInputFormatClass(HFileInputFormat.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
  if (hfileOutPath != null) {
    LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
    TableName tableName = TableName.valueOf(tabName);
    job.setMapperClass(HFileCellMapper.class);
    job.setReducerClass(CellSortReducer.class);
    Path outputDir = new Path(hfileOutPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputValueClass(MapReduceExtendedCell.class);
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Table table = conn.getTable(tableName);
        RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
      HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
    }
    LOG.debug("success configuring load incremental job");

    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
      org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class);
  } else {
    throw new IOException("No bulk output directory specified");
  }
  return job;
}

项目：SOAPgaea 文件：LoadVCFToHBase.java

@Override
public int run(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();

    String[] remainArgs = remainArgs(args, conf);
    options = new LoadVCFToHBaseOptions();
    options.parse(remainArgs);
    options.setHadoopConf(remainArgs, conf);

    conf.addResource(new Path(options.getConfig() + "hbase-site.xml"));
    conf.addResource(new Path(options.getConfig() + "core-site.xml"));
    conf.set("vcfHeader", options.getHeaderOutput());
    Job job = new Job(conf);

    createTable(conf,options.getTableName());

    MultipleVCFHeader vcfHeaders = new MultipleVCFHeader();
    vcfHeaders.mergeHeader(new Path(options.getInput()),options.getHeaderOutput(), job, false);

    job.setJobName("vcf to hbase");
    job.setNumReduceTasks(options.getReducerNumber());
    job.setInputFormatClass(VCFMultipleInputFormat.class);

    job.setJarByClass(LoadVCFToHBase.class);
    job.setMapperClass(VCFToHBaseMapper.class);
    job.setReducerClass(PutSortReducer.class);

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Put.class);

    FileInputFormat.setInputPaths(job, new Path(options.getInput()));
    FileOutputFormat.setOutputPath(job, new Path(options.getHFileOutput()));

    HFileOutputFormat2.configureIncrementalLoad(job, new HTable(conf,options.getTableName()));

    if (job.waitForCompletion(true)) {
        LoadHFile2HBase(conf,options.getTableName(),options.getHFileOutput());
        return 0;
    } else {
        return 1;
    }
}

项目：SOAPgaea 文件：DBNSFPToHbase.java

@Override
    public int run(String[] args) throws Exception {
        Configuration conf = HBaseConfiguration.create();

        String[] remainArgs = remainArgs(args, conf);
        options = new DBNSFPToHbaseOptions();
        options.parse(remainArgs);
        options.setHadoopConf(remainArgs, conf);
        tableName = TableName.valueOf(options.getTableName());
        conf.set("DEFAULT_COLUMN_FAMILY", "data");
        conf.addResource(new Path(options.getConfig() + "hbase-site.xml"));
        conf.addResource(new Path(options.getConfig() + "core-site.xml"));
        conn = ConnectionFactory.createConnection(conf);

        setHeader(new Path(options.getInput()), conf);
        long reduceThreshMem = (long) (1 << 28);
        conf.setLong("putsortreducer.row.threshold", reduceThreshMem);

        Job job = Job.getInstance(conf, "dbNSFPtoHbase");
        createTable(tableName);

        job.setJarByClass(org.bgi.flexlab.gaea.tools.mapreduce.annotator.databaseload.DBNSFPToHbase.class);
        job.setMapperClass(DBNSFPToHbaseMapper.class);
        job.setReducerClass(PutSortReducer.class);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(Put.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(Put.class);

        FileInputFormat.setInputPaths(job, new Path(options.getInput()));
        FileOutputFormat.setOutputPath(job, new Path(options.getHFileOutput()));

//        HFileOutputFormat2.configureIncrementalLoad(job, new HTable(conf,options.getTableName()));
        HFileOutputFormat2.configureIncrementalLoad(job, conn.getTable(tableName), conn.getRegionLocator(tableName));

        if (job.waitForCompletion(true)) {
            LoadHFile2HBase(conf, tableName, options.getHFileOutput());
            conn.close();
            return 0;
        } else {
            conn.close();
            return 1;
        }
    }

项目：Halyard 文件：HalyardBulkUpdate.java

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] <input_file_with_SPARQL_queries> <output_path> <table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
    NLineInputFormat.setNumLinesPerSplit(job, 1);
    job.setJarByClass(HalyardBulkUpdate.class);
    job.setMapperClass(SPARQLMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Update Completed..");
            return 0;
        }
    }
    return -1;
}

项目：Gaffer 文件：HBaseAddElementsFromHdfsJobFactoryTest.java

@Test
public void shouldSetupJob() throws IOException, StoreException {
    // Given
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));

    final HBaseAddElementsFromHdfsJobFactory factory = new HBaseAddElementsFromHdfsJobFactory();
    final Job job = mock(Job.class);
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder()
            .addInputMapperPair(new Path(inputDir).toString(), TextMapperGeneratorImpl.class.getName())
            .outputPath(outputDir)
            .failurePath(failureDir)
            .jobInitialiser(new TextJobInitialiser())
            .option(HBaseStoreConstants.OPERATION_HDFS_STAGING_PATH, stagingDir)
            .build();

    final HBaseStore store = new SingleUseMiniHBaseStore();
    final Schema schema = Schema.fromJson(StreamUtil.schemas(getClass()));
    final HBaseProperties properties = HBaseProperties.loadStoreProperties(StreamUtil.storeProps(getClass()));
    store.initialise("graphId", schema, properties);

    given(job.getConfiguration()).willReturn(localConf);

    // When
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);

    // Then
    verify(job).setJarByClass(factory.getClass());
    verify(job).setJobName("Ingest HDFS data: Generator=" + TextMapperGeneratorImpl.class.getName() + ", output=" + outputDir);

    verify(job).setMapperClass(AddElementsFromHdfsMapper.class);
    verify(job).setMapOutputKeyClass(ImmutableBytesWritable.class);
    verify(job).setMapOutputValueClass(KeyValue.class);

    verify(job).setReducerClass(AddElementsFromHdfsReducer.class);
    verify(job).setOutputKeyClass(ImmutableBytesWritable.class);
    verify(job).setOutputValueClass(KeyValue.class);
    verify(job).setOutputFormatClass(HFileOutputFormat2.class);

    assertEquals(fs.makeQualified(new Path(outputDir)).toString(), job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir"));

    verify(job).setNumReduceTasks(1);
}

项目：cloud-bigtable-client 文件：Import.java

/**
 * Sets up the actual job.
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
  TableName tableName = TableName.valueOf(args[0]);
  conf.set(TABLE_NAME, tableName.getNameAsString());
  Path inputDir = new Path(args[1]);
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(Importer.class);
  FileInputFormat.setInputPaths(job, inputDir);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);

  // make sure we get the filter in the jars
  try {
    Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
    if (filter != null) {
      TableMapReduceUtil.addDependencyJars(conf, filter);
    }
  } catch (Exception e) {
    throw new IOException(e);
  }

  if (hfileOutPath != null) {
    job.setMapperClass(KeyValueImporter.class);
    try (Connection conn = ConnectionFactory.createConnection(conf); 
        Table table = conn.getTable(tableName);
        RegionLocator regionLocator = conn.getRegionLocator(tableName)){
      job.setReducerClass(KeyValueSortReducer.class);
      Path outputDir = new Path(hfileOutPath);
      FileOutputFormat.setOutputPath(job, outputDir);
      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
      job.setMapOutputValueClass(KeyValue.class);
      HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
      TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
          com.google.common.base.Preconditions.class);
    }
  } else {
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    job.setMapperClass(Importer.class);
    TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
    job.setNumReduceTasks(0);
  }
  return job;
}