Java 类org.apache.hadoop.mapred.TextInputFormat 实例源码

项目:hadoop    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:big-data-benchmark    文件:HdfsToMap.java   
private static void fillMap(JetInstance client, String name, String inputPath, int parallelism) throws Exception {
    DAG dag = new DAG();
    JobConf conf = new JobConf();
    conf.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(conf, new Path(inputPath));


    Vertex reader = dag.newVertex("reader", readHdfsP(conf, Util::entry));
    Vertex mapper = dag.newVertex("mapper",
            mapP((Map.Entry<LongWritable, Text> e) -> entry(e.getKey().get(), e.getValue().toString())));
    Vertex writer = dag.newVertex("writer", writeMapP(name));

    reader.localParallelism(parallelism);
    mapper.localParallelism(parallelism);
    writer.localParallelism(parallelism);

    dag.edge(between(reader, mapper));
    dag.edge(between(mapper, writer));


    JobConfig jobConfig = new JobConfig();
    jobConfig.addClass(HdfsToMap.class);

    client.newJob(dag, jobConfig).join();
}
项目:ditb    文件:TableMapReduceUtil.java   
/**
 * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
 */
public static void addDependencyJars(JobConf job) throws IOException {
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
    job,
    // when making changes here, consider also mapreduce.TableMapReduceUtil
    // pull job classes
    job.getMapOutputKeyClass(),
    job.getMapOutputValueClass(),
    job.getOutputKeyClass(),
    job.getOutputValueClass(),
    job.getPartitionerClass(),
    job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
    job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
    job.getCombinerClass());
}
项目:aliyun-oss-hadoop-fs    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:aliyun-oss-hadoop-fs    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:Hadoop-CaseStudies    文件:WeatherData.java   
public static void main(String[] args) throws Exception {

        JobConf conf = new JobConf(WeatherData.class);
        conf.setJobName("temp");

        // Note:- As Mapper's output types are not default so we have to define
        // the
        // following properties.
        conf.setMapOutputKeyClass(Text.class);
        conf.setMapOutputValueClass(Text.class);

        conf.setMapperClass(MaxTemperatureMapper.class);
        conf.setReducerClass(MaxTemperatureReducer.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[0]));
        FileOutputFormat.setOutputPath(conf, new Path(args[1]));

        JobClient.runJob(conf);

    }
项目:big-c    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:big-c    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:THUTag    文件:ImportDouban.java   
@Override
public void run(String[] args) throws Exception {
  Flags flags = new Flags();
  flags.addWithDefaultValue(
      "tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
  flags.addWithDefaultValue(
      "subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
  flags.add("output");
  flags.parseAndCheck(args);

  JobConf job = new JobConf(this.getClass());
  job.setJobName("convert-douban-raw-to-posts");
  MapReduceHelper.setAllOutputTypes(job, Text.class);
  MapReduceHelper.setMR(
      job, DoubanRawMapper.class, DoubanToPostReducer.class);
  job.setInputFormat(TextInputFormat.class);
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("tag_subject_data")));
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("subject_data")));
  job.setOutputFormat(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(
      job, new Path(flags.getString("output")));
  JobClient.runJob(job);
}
项目:tracing-framework    文件:ReadExistingDataJob.java   
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(ReadDataJob.TestMapper.class);

    // Make sure this jar is included
    job.setJarByClass(ReadDataJob.TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_path));
}
项目:tracing-framework    文件:ReadDataJob.java   
public void configure(JobConf job) {
    // Set the mapper and reducers
    job.setMapperClass(TestMapper.class);
    // job.setReducerClass(TestReducer.class);

    // Set the output types of the mapper and reducer
    // job.setMapOutputKeyClass(IntWritable.class);
    // job.setMapOutputValueClass(NullWritable.class);
    // job.setOutputKeyClass(NullWritable.class);
    // job.setOutputValueClass(NullWritable.class);

    // Make sure this jar is included
    job.setJarByClass(TestMapper.class);

    // Specify the input and output data formats
    job.setInputFormat(TextInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);

    // Turn off speculative execution
    job.setMapSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);

    // Add the job input path
    FileInputFormat.addInputPath(job, new Path(this.input_filename));
}
项目:bdelab    文件:WordCountOldAPI.java   
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCountOldAPI.class);
    conf.setJobName("old wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop-2.6.0-cdh5.4.3    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:hadoop-EAR    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop-EAR    文件:TxnGenerator.java   
private static JobConf createJobConf(Configuration conf) throws IOException {
  JobConf jobConf = new JobConf(conf);
  String jobName = "transaction_generator";
  jobConf.setJobName(jobName);

  String splitDir = workplace + "split/";

  jobConf.set(TEST_DIR_LABEL, workplace);

  jobConf.setMapSpeculativeExecution(false);
  jobConf.setJarByClass(TxnGenerator.class);
  jobConf.setMapperClass(GeneratorMapper.class);
  jobConf.setInputFormat(TextInputFormat.class);

  FileInputFormat.addInputPath(jobConf, new Path(splitDir));
  Random random = new Random();
  FileOutputFormat.setOutputPath(jobConf, new Path(workplace, "output" + random.nextLong()));

  jobConf.setNumReduceTasks(0);
  jobConf.setNumMapTasks(numMappers);

  createSplitFiles(conf, new Path(splitDir));

  return jobConf;
}
项目:hadoop-EAR    文件:PipeMapper.java   
@SuppressWarnings("unchecked")
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean("mapred.skip.on", false);
  String inputFormatClassName = job.getClass("mapred.input.format.class",
      TextInputFormat.class).getCanonicalName();
  ignoreKey = ignoreKey || inputFormatClassName.equals(TextInputFormat.class.getCanonicalName());

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:apex-malhar    文件:WordCount.java   
public void run(String[] args) throws Exception
{

  JobConf conf = new JobConf(this.getClass());
  conf.setJobName("wordcount");

  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);

  conf.setMapperClass(Map.class);
  conf.setCombinerClass(Reduce.class);
  conf.setReducerClass(Reduce.class);

  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(TextOutputFormat.class);

  FileInputFormat.setInputPaths(conf, new Path(args[0]));
  FileOutputFormat.setOutputPath(conf, new Path(args[1]));

  JobClient.runJob(conf);
}
项目:hadoop-plus    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:hadoop-plus    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:systemml    文件:ReaderTextCSVParallel.java   
public CSVReadTask(InputSplit split, SplitOffsetInfos offsets,
        TextInputFormat informat, JobConf job, MatrixBlock dest,
        long rlen, long clen, boolean hasHeader, String delim,
        boolean fill, double fillValue, int splitCount) 
{
    _split = split;
    _splitoffsets = offsets; // new SplitOffsetInfos(offsets);
    _sparse = dest.isInSparseFormat();
    _informat = informat;
    _job = job;
    _dest = dest;
    _rlen = rlen;
    _clen = clen;
    _isFirstSplit = (splitCount == 0);
    _hasHeader = hasHeader;
    _fill = fill;
    _fillValue = fillValue;
    _delim = delim;
    _rc = true;
    _splitCount = splitCount;
}
项目:hops    文件:TestMultipleInputs.java   
@Test
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:systemml    文件:FrameReaderTextCell.java   
protected void readTextCellFrameFromHDFS( Path path, JobConf job, FileSystem fs, FrameBlock dest, 
        ValueType[] schema, String[] names, long rlen, long clen)
    throws IOException
{
    if( fs.isDirectory(path) ) {
        FileInputFormat.addInputPath(job, path);
        TextInputFormat informat = new TextInputFormat();
        informat.configure(job);
        InputSplit[] splits = informat.getSplits(job, 1);
        for(InputSplit split: splits)
            readTextCellFrameFromInputSplit(split, informat, job, dest);
    }
    else {
        readRawTextCellFrameFromHDFS(path, job, fs, dest, schema, names, rlen, clen);
    }
}
项目:hops    文件:PipeMapper.java   
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }

  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
项目:HIndex    文件:TableMapReduceUtil.java   
/**
 * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
 */
public static void addDependencyJars(JobConf job) throws IOException {
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
    job,
    // when making changes here, consider also mapreduce.TableMapReduceUtil
    // pull job classes
    job.getMapOutputKeyClass(),
    job.getMapOutputValueClass(),
    job.getOutputKeyClass(),
    job.getOutputValueClass(),
    job.getPartitionerClass(),
    job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
    job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
    job.getCombinerClass());
}
项目:pbase    文件:TableMapReduceUtil.java   
/**
 * @see org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil#addDependencyJars(org.apache.hadoop.mapreduce.Job)
 */
public static void addDependencyJars(JobConf job) throws IOException {
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addHBaseDependencyJars(job);
  org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJars(
    job,
    // when making changes here, consider also mapreduce.TableMapReduceUtil
    // pull job classes
    job.getMapOutputKeyClass(),
    job.getMapOutputValueClass(),
    job.getOutputKeyClass(),
    job.getOutputValueClass(),
    job.getPartitionerClass(),
    job.getClass("mapred.input.format.class", TextInputFormat.class, InputFormat.class),
    job.getClass("mapred.output.format.class", TextOutputFormat.class, OutputFormat.class),
    job.getCombinerClass());
}
项目:FlexMap    文件:TestMultipleInputs.java   
public void testAddInputPathWithMapper() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class,
     MapClass.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
     KeyValueTextInputFormat.class, MapClass2.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  final Map<Path, Class<? extends Mapper>> maps = MultipleInputs
     .getMapperTypeMap(conf);

  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
  assertEquals(MapClass.class, maps.get(new Path("/foo")));
  assertEquals(MapClass2.class, maps.get(new Path("/bar")));
}
项目:presto    文件:HiveUtil.java   
static InputFormat<?, ?> getInputFormat(Configuration configuration, Properties schema, boolean symlinkTarget)
{
    String inputFormatName = getInputFormatName(schema);
    try {
        JobConf jobConf = new JobConf(configuration);

        Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName);
        if (symlinkTarget && (inputFormatClass == SymlinkTextInputFormat.class)) {
            // symlink targets are always TextInputFormat
            inputFormatClass = TextInputFormat.class;
        }

        return ReflectionUtils.newInstance(inputFormatClass, jobConf);
    }
    catch (ClassNotFoundException | RuntimeException e) {
        throw new RuntimeException("Unable to create input format " + inputFormatName, e);
    }
}
项目:circus-train    文件:TestUtils.java   
public static Table createUnpartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws TException {
  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
项目:circus-train    文件:TestUtils.java   
public static Table createPartitionedTable(
    HiveMetaStoreClient metaStoreClient,
    String database,
    String table,
    URI location)
  throws Exception {

  Table hiveTable = new Table();
  hiveTable.setDbName(database);
  hiveTable.setTableName(table);
  hiveTable.setTableType(TableType.EXTERNAL_TABLE.name());
  hiveTable.putToParameters("EXTERNAL", "TRUE");

  hiveTable.setPartitionKeys(PARTITION_COLUMNS);

  StorageDescriptor sd = new StorageDescriptor();
  sd.setCols(DATA_COLUMNS);
  sd.setLocation(location.toString());
  sd.setParameters(new HashMap<String, String>());
  sd.setInputFormat(TextInputFormat.class.getName());
  sd.setOutputFormat(TextOutputFormat.class.getName());
  sd.setSerdeInfo(new SerDeInfo());
  sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde");

  hiveTable.setSd(sd);

  metaStoreClient.createTable(hiveTable);

  ColumnStatisticsDesc statsDesc = new ColumnStatisticsDesc(true, database, table);
  ColumnStatisticsData statsData = new ColumnStatisticsData(_Fields.LONG_STATS, new LongColumnStatsData(1L, 2L));
  ColumnStatisticsObj cso1 = new ColumnStatisticsObj("id", "bigint", statsData);
  List<ColumnStatisticsObj> statsObj = Collections.singletonList(cso1);
  metaStoreClient.updateTableColumnStatistics(new ColumnStatistics(statsDesc, statsObj));

  return hiveTable;
}
项目:hadoop    文件:TestMultipleInputs.java   
public void testAddInputPathWithFormat() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
      KeyValueTextInputFormat.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
}
项目:hadoop    文件:TestKeyFieldBasedComparator.java   
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }

  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}
项目:hadoop    文件:TestMROldApiJobs.java   
static boolean runJob(JobConf conf, Path inDir, Path outDir, int numMaps, 
                         int numReds) throws IOException, InterruptedException {

  FileSystem fs = FileSystem.get(conf);
  if (fs.exists(outDir)) {
    fs.delete(outDir, true);
  }
  if (!fs.exists(inDir)) {
    fs.mkdirs(inDir);
  }
  String input = "The quick brown fox\n" + "has many silly\n"
      + "red fox sox\n";
  for (int i = 0; i < numMaps; ++i) {
    DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
    file.writeBytes(input);
    file.close();
  }

  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf, fs);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  JobClient jobClient = new JobClient(conf);

  RunningJob job = jobClient.submitJob(conf);
  return jobClient.monitorAndPrintJob(conf, job);
}
项目:hadoop    文件:TestMRAppWithCombiner.java   
@Test
public void testCombinerShouldUpdateTheReporter() throws Exception {
  JobConf conf = new JobConf(mrCluster.getConfig());
  int numMaps = 5;
  int numReds = 2;
  Path in = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-in");
  Path out = new Path(mrCluster.getTestWorkDir().getAbsolutePath(),
      "testCombinerShouldUpdateTheReporter-out");
  createInputOutPutFolder(in, out, numMaps);
  conf.setJobName("test-job-with-combiner");
  conf.setMapperClass(IdentityMapper.class);
  conf.setCombinerClass(MyCombinerToCheckReporter.class);
  //conf.setJarByClass(MyCombinerToCheckReporter.class);
  conf.setReducerClass(IdentityReducer.class);
  DistributedCache.addFileToClassPath(TestMRJobs.APP_JAR, conf);
  conf.setOutputCommitter(CustomOutputCommitter.class);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputKeyClass(LongWritable.class);
  conf.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(conf, in);
  FileOutputFormat.setOutputPath(conf, out);
  conf.setNumMapTasks(numMaps);
  conf.setNumReduceTasks(numReds);

  runJob(conf);
}
项目:hadoop    文件:PipesNonJavaInputFormat.java   
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
  // Delegate the generation of input splits to the 'original' InputFormat
  return ReflectionUtils.newInstance(
      job.getClass(Submitter.INPUT_FORMAT, 
                   TextInputFormat.class, 
                   InputFormat.class), job).getSplits(job, numSplits);
}
项目:hadoop    文件:FieldSelectionMapReduce.java   
public void configure(JobConf job) {
  this.fieldSeparator = job.get(FieldSelectionHelper.DATA_FIELD_SEPERATOR,
      "\t");
  this.mapOutputKeyValueSpec = job.get(
      FieldSelectionHelper.MAP_OUTPUT_KEY_VALUE_SPEC, "0-:");
  this.ignoreInputKey = TextInputFormat.class.getCanonicalName().equals(
      job.getInputFormat().getClass().getCanonicalName());
  this.reduceOutputKeyValueSpec = job.get(
      FieldSelectionHelper.REDUCE_OUTPUT_KEY_VALUE_SPEC, "0-:");
  parseOutputKeyValueSpec();
  LOG.info(specToString());
}
项目:hadoop    文件:TestStreamingOutputKeyValueTypes.java   
@Test
public void testDefaultToIdentityReducer() throws Exception {
  args.add("-mapper");args.add(map);
  args.add("-jobconf");
  args.add("mapreduce.task.files.preserve.failedtasks=true");
  args.add("-jobconf");
  args.add("stream.tmpdir="+System.getProperty("test.build.data","/tmp"));
  args.add("-inputformat");args.add(TextInputFormat.class.getName());
  super.testCommandLine();
}
项目:aliyun-oss-hadoop-fs    文件:TestMultipleInputs.java   
public void testAddInputPathWithFormat() {
  final JobConf conf = new JobConf();
  MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class);
  MultipleInputs.addInputPath(conf, new Path("/bar"),
      KeyValueTextInputFormat.class);
  final Map<Path, InputFormat> inputs = MultipleInputs
     .getInputFormatMap(conf);
  assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass());
  assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar"))
     .getClass());
}