Java 类org.apache.hadoop.mapred.JobClient 实例源码

项目:hadoop    文件:NNBench.java   
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);

  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);

  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);

  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
项目:ditb    文件:TestTableInputFormat.java   
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
  final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
  job.setInputFormat(clazz);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);
  LOG.debug("submitting job.");
  final RunningJob run = JobClient.runJob(job);
  assertTrue("job failed!", run.isSuccessful());
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
  assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
  assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
  assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
项目:hadoop    文件:TestDFSIO.java   
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
项目:hadoop    文件:TestDatamerge.java   
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
项目:hadoop    文件:TestDatamerge.java   
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
项目:hadoop    文件:GenericMRLoadGenerator.java   
/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(Job job)
    throws IOException {
  // from RandomWriter
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapOutput.class);

  Configuration conf = job.getConfiguration();
  final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
  int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
  long numBytesToWritePerMap =
    conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    throw new IOException(
        "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
  }
  long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
       numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
  int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}
项目:hadoop    文件:TestMiniMRProxyUser.java   
private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}
项目:hadoop    文件:DistCpV1.java   
/**
 * Increase the replication factor of _distcp_src_files to
 * sqrt(min(maxMapsOnCluster, numMaps)). This is to reduce the chance of
 * failing of distcp because of "not having a replication of _distcp_src_files
 * available for reading for some maps".
 */
private static void setReplication(Configuration conf, JobConf jobConf,
                       Path srcfilelist, int numMaps) throws IOException {
  int numMaxMaps = new JobClient(jobConf).getClusterStatus().getMaxMapTasks();
  short replication = (short) Math.ceil(
                              Math.sqrt(Math.min(numMaxMaps, numMaps)));
  FileSystem fs = srcfilelist.getFileSystem(conf);
  FileStatus srcStatus = fs.getFileStatus(srcfilelist);

  if (srcStatus.getReplication() < replication) {
    if (!fs.setReplication(srcfilelist, replication)) {
      throw new IOException("Unable to increase the replication of file " +
                            srcfilelist);
    }
  }
}
项目:hadoop    文件:GenerateData.java   
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
项目:hadoop    文件:GenerateDistCacheData.java   
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
项目:hadoop    文件:TestCompressionEmulationUtils.java   
/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);

  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);

  Job job = Job.getInstance(conf);

  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);

  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);

  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}
项目:hadoop    文件:DataJoinJob.java   
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
项目:ditb    文件:TestTableMapReduceUtil.java   
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(1);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);
    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}
项目:ditb    文件:TestTableMapReduceUtil.java   
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation()
    throws IOException {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(2);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);

    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}
项目:ditb    文件:TestMultiTableSnapshotInputFormat.java   
@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
    throws IOException, InterruptedException, ClassNotFoundException {
  JobConf job = new JobConf(TEST_UTIL.getConfiguration());

  job.setJobName(jobName);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(Reducer.class);

  TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
      ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);

  TableMapReduceUtil.addDependencyJars(job);

  job.setReducerClass(Reducer.class);
  job.setNumReduceTasks(1); // one to get final "first" and "last" key
  FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
  LOG.info("Started " + job.getJobName());

  RunningJob runningJob = JobClient.runJob(job);
  runningJob.waitForCompletion();
  assertTrue(runningJob.isSuccessful());
  LOG.info("After map/reduce completion - job " + jobName);
}
项目:aliyun-oss-hadoop-fs    文件:TestDFSIO.java   
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
项目:aliyun-oss-hadoop-fs    文件:TestDatamerge.java   
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
项目:aliyun-oss-hadoop-fs    文件:NNBench.java   
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);

  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);

  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);

  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
项目:emr-dynamodb-connector    文件:ReadIopsCalculator.java   
public ReadIopsCalculator(JobClient jobClient, DynamoDBClient dynamoDBClient, String tableName,
    int totalSegments, int localSegments) {
  this.jobConf = (JobConf) jobClient.getConf();
  this.jobClient = jobClient;

  this.dynamoDBClient = dynamoDBClient;
  this.tableName = tableName;
  this.totalSegments = totalSegments;
  this.localSegments = localSegments;

  this.throughputPercent = Double.parseDouble(jobConf.get(DynamoDBConstants
      .THROUGHPUT_READ_PERCENT, DynamoDBConstants.DEFAULT_THROUGHPUT_PERCENTAGE));

  log.info("Table name: " + tableName);
  log.info("Throughput percent: " + throughputPercent);
}
项目:aliyun-oss-hadoop-fs    文件:TestCompressionEmulationUtils.java   
/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);

  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);

  Job job = Job.getInstance(conf);

  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);

  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);

  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}
项目:big-c    文件:GenerateData.java   
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
项目:THUTag    文件:ImportDouban.java   
@Override
public void run(String[] args) throws Exception {
  Flags flags = new Flags();
  flags.addWithDefaultValue(
      "tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
  flags.addWithDefaultValue(
      "subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
  flags.add("output");
  flags.parseAndCheck(args);

  JobConf job = new JobConf(this.getClass());
  job.setJobName("convert-douban-raw-to-posts");
  MapReduceHelper.setAllOutputTypes(job, Text.class);
  MapReduceHelper.setMR(
      job, DoubanRawMapper.class, DoubanToPostReducer.class);
  job.setInputFormat(TextInputFormat.class);
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("tag_subject_data")));
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("subject_data")));
  job.setOutputFormat(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(
      job, new Path(flags.getString("output")));
  JobClient.runJob(job);
}
项目:GeoCrawler    文件:LinkDbMerger.java   
public void merge(Path output, Path[] dbs, boolean normalize, boolean filter)
    throws Exception {
  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
  long start = System.currentTimeMillis();
  LOG.info("LinkDb merge: starting at " + sdf.format(start));

  JobConf job = createMergeJob(getConf(), output, normalize, filter);
  for (int i = 0; i < dbs.length; i++) {
    FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));
  }
  JobClient.runJob(job);
  FileSystem fs = FileSystem.get(getConf());
  fs.mkdirs(output);
  fs.rename(FileOutputFormat.getOutputPath(job), new Path(output,
      LinkDb.CURRENT_NAME));

  long end = System.currentTimeMillis();
  LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: "
      + TimingUtil.elapsedTime(start, end));
}
项目:big-c    文件:TestDFSIO.java   
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}
项目:bdelab    文件:WordCountOldAPI.java   
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCountOldAPI.class);
    conf.setJobName("old wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
}
项目:big-c    文件:TestDatamerge.java   
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}
项目:big-c    文件:TestCompressionEmulationUtils.java   
/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);

  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);

  Job job = Job.getInstance(conf);

  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);

  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);

  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}
项目:big-c    文件:TestMiniMRProxyUser.java   
private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}
项目:big-c    文件:NNBench.java   
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);

  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);

  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);

  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}
项目:hadoop    文件:ExternalMapReduce.java   
public int run(String[] argv) throws IOException {
  if (argv.length < 2) {
    System.out.println("ExternalMapReduce <input> <output>");
    return -1;
  }
  Path outDir = new Path(argv[1]);
  Path input = new Path(argv[0]);
  JobConf testConf = new JobConf(getConf(), ExternalMapReduce.class);

  //try to load a class from libjar
  try {
    testConf.getClassByName("testjar.ClassWordCount");
  } catch (ClassNotFoundException e) {
    System.out.println("Could not find class from libjar");
    return -1;
  }


  testConf.setJobName("external job");
  FileInputFormat.setInputPaths(testConf, input);
  FileOutputFormat.setOutputPath(testConf, outDir);
  testConf.setMapperClass(MapClass.class);
  testConf.setReducerClass(Reduce.class);
  testConf.setNumReduceTasks(1);
  JobClient.runJob(testConf);
  return 0;
}
项目:hadoop    文件:TestKeyFieldBasedComparator.java   
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }

  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop    文件:TestEncryptedShuffle.java   
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
项目:hadoop    文件:TestMROldApiJobs.java   
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
                         int numReds) throws IOException, InterruptedException {

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (!fs.exists(inDir)) {
    fs.mkdirs(inDir);
  }
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  for (int i = 0; i < numMaps; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }

  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  JobClient jobClient = new JobClient(conf);

  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
项目:hadoop    文件:TestMRCJCSocketFactory.java   
private void closeClient(JobClient client) {
  try {
    if (client != null)
      client.close();
  } catch (Exception ignored) {
    // nothing we can do
    ignored.printStackTrace();
  }
}
项目:hadoop    文件:Job.java   
/**
 * @return the job client of this job
 */
public JobClient getJobClient() {
  try {
    return new JobClient(super.getJob().getConfiguration());
  } catch (IOException ioe) {
    return null;
  }
}
项目:hadoop    文件:DistCpV1.java   
/**
 * Driver to copy srcPath to destPath depending on required protocol.
 * @param conf configuration
 * @param args arguments
 */
static void copy(final Configuration conf, final Arguments args
    ) throws IOException {
  LOG.info("srcPaths=" + args.srcs);
  if (!args.dryrun || args.flags.contains(Options.UPDATE)) {
    LOG.info("destPath=" + args.dst);
  }

  JobConf job = createJobConf(conf);

  checkSrcPath(job, args.srcs);
  if (args.preservedAttributes != null) {
    job.set(PRESERVE_STATUS_LABEL, args.preservedAttributes);
  }
  if (args.mapredSslConf != null) {
    job.set("dfs.https.client.keystore.resource", args.mapredSslConf);
  }

  //Initialize the mapper
  try {
    if (setup(conf, job, args)) {
      JobClient.runJob(job);
    }
    if(!args.dryrun) {
      finalize(conf, job, args.dst, args.preservedAttributes);
    }
  } finally {
    if (!args.dryrun) {
      //delete tmp
      fullyDelete(job.get(TMP_DIR_LABEL), job);
    }
    //delete jobDirectory
    fullyDelete(job.get(JOB_DIR_LABEL), job);
  }
}
项目:hadoop    文件:DistCpV1.java   
/**
 * Calculate how many maps to run.
 * Number of maps is bounded by a minimum of the cumulative size of the
 * copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the
 * command line) and at most (distcp.max.map.tasks, default
 * MAX_MAPS_PER_NODE * nodes in the cluster).
 * @param totalBytes Count of total bytes for job
 * @param job The job to configure
 * @return Count of maps to run.
 */
private static int setMapCount(long totalBytes, JobConf job) 
    throws IOException {
  int numMaps =
    (int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
  numMaps = Math.min(numMaps, 
      job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE *
        new JobClient(job).getClusterStatus().getTaskTrackers()));
  numMaps = Math.max(numMaps, 1);
  job.setNumMapTasks(numMaps);
  return numMaps;
}
项目:hadoop    文件:Statistics.java   
public Statistics(
  final Configuration conf, int pollingInterval, CountDownLatch startFlag)
  throws IOException, InterruptedException {
    UserGroupInformation ugi = UserGroupInformation.getLoginUser();
    this.cluster = ugi.doAs(new PrivilegedExceptionAction<JobClient>() {
      public JobClient run() throws IOException {
        return new JobClient(new JobConf(conf));
      }
    });

  this.jtPollingInterval = pollingInterval;
  maxJobCompletedInInterval = conf.getInt(
    MAX_JOBS_COMPLETED_IN_POLL_INTERVAL_KEY, 1);
  this.startFlag = startFlag;
}
项目:hadoop    文件:TestGridmixSummary.java   
/**
 * Test {@link ClusterSummarizer}.
 */
@Test  (timeout=20000)
public void testClusterSummarizer() throws IOException {
  ClusterSummarizer cs = new ClusterSummarizer();
  Configuration conf = new Configuration();

  String jt = "test-jt:1234";
  String nn = "test-nn:5678";
  conf.set(JTConfig.JT_IPC_ADDRESS, jt);
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, nn);
  cs.start(conf);

  assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo());
  assertEquals("NN name mismatch", nn, cs.getNamenodeInfo());

  ClusterStats cStats = ClusterStats.getClusterStats();
  conf.set(JTConfig.JT_IPC_ADDRESS, "local");
  conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local");
  JobClient jc = new JobClient(conf);
  cStats.setClusterMetric(jc.getClusterStatus());

  cs.update(cStats);

  // test
  assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks());
  assertEquals("Cluster summary test failed!", 1, cs.getMaxReduceTasks());
  assertEquals("Cluster summary test failed!", 1, cs.getNumActiveTrackers());
  assertEquals("Cluster summary test failed!", 0, 
               cs.getNumBlacklistedTrackers());
}
项目:ditb    文件:RowCounter.java   
public int run(final String[] args) throws Exception {
  // Make sure there are at least 3 parameters
  if (args.length < 3) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    return printUsage();
  }
  JobClient.runJob(createSubmittableJob(args));
  return 0;
}