Java 类org.apache.hadoop.mapred.SequenceFileOutputFormat 实例源码

项目:GeoCrawler    文件:SegmentReader.java   
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
  SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(
      getConf(), dir);
  ArrayList<Writable> res = new ArrayList<Writable>();
  Class<?> keyClass = readers[0].getKeyClass();
  Class<?> valueClass = readers[0].getValueClass();
  if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
    throw new IOException("Incompatible key (" + keyClass.getName() + ")");
  Writable aKey = (Writable) keyClass.newInstance();
  Writable value = (Writable) valueClass.newInstance();
  for (int i = 0; i < readers.length; i++) {
    while (readers[i].next(aKey, value)) {
      if (aKey.equals(key)) {
        res.add(value);
        value = (Writable) valueClass.newInstance();
      }
    }
    readers[i].close();
  }
  return res;
}
项目:THUTag    文件:ImportDouban.java   
@Override
public void run(String[] args) throws Exception {
  Flags flags = new Flags();
  flags.addWithDefaultValue(
      "tag_subject_data", "/media/work/datasets(secret)/douban/raw/tag_subject.dat", "");
  flags.addWithDefaultValue(
      "subject_data", "/media/work/datasets(secret)/douban/raw/subject.dat", "");
  flags.add("output");
  flags.parseAndCheck(args);

  JobConf job = new JobConf(this.getClass());
  job.setJobName("convert-douban-raw-to-posts");
  MapReduceHelper.setAllOutputTypes(job, Text.class);
  MapReduceHelper.setMR(
      job, DoubanRawMapper.class, DoubanToPostReducer.class);
  job.setInputFormat(TextInputFormat.class);
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("tag_subject_data")));
  TextInputFormat.addInputPath(
      job, new Path(flags.getString("subject_data")));
  job.setOutputFormat(SequenceFileOutputFormat.class);
  SequenceFileOutputFormat.setOutputPath(
      job, new Path(flags.getString("output")));
  JobClient.runJob(job);
}
项目:hadoop-EAR    文件:DataFsck.java   
private JobConf createJobConf() {
  JobConf jobConf = new JobConf(getConf());
  String jobName = NAME + " " + dateForm.format(new Date(System.currentTimeMillis()));
  jobConf.setJobName(jobName);
  jobConf.setMapSpeculativeExecution(false);

  jobConf.setJarByClass(DataFsck.class);
  jobConf.setInputFormat(DataFsckInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);

  jobConf.setMapperClass(DataFsckMapper.class);
  jobConf.setNumReduceTasks(0);
  return jobConf;
}
项目:hadoop-EAR    文件:FastFileCheck.java   
private JobConf createJobConf(Configuration conf) {
  JobConf jobConf = new JobConf(conf);
  String jobName = NAME + "_" + dateForm.format(new Date(System.currentTimeMillis()));
  jobConf.setJobName(jobName);
  jobConf.setMapSpeculativeExecution(false);
  jobConf.setJarByClass(FastFileCheck.class);
  jobConf.setInputFormat(FileCheckInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);
  jobConf.setMapperClass(FileCheckMapper.class);
  jobConf.setNumReduceTasks(0);
  jobConf.setBoolean(SOURCE_ONLY_CONF, sourceOnly);

  return jobConf;
}
项目:anthelion    文件:SegmentReader.java   
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
  SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), dir);
  ArrayList<Writable> res = new ArrayList<Writable>();
  Class keyClass = readers[0].getKeyClass();
  Class valueClass = readers[0].getValueClass();
  if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
    throw new IOException("Incompatible key (" + keyClass.getName() + ")");
  Writable aKey = (Writable)keyClass.newInstance();
  Writable value = (Writable)valueClass.newInstance();
  for (int i = 0; i < readers.length; i++) {
    while (readers[i].next(aKey, value)) {
      if (aKey.equals(key)) {
        res.add(value);
        value = (Writable)valueClass.newInstance();
      }
    }
    readers[i].close();
  }
  return res;
}
项目:pss    文件:HybridDriver.java   
public static void IDMappingJob(String[] args) throws  IOException {

        JobConf job = new JobConf();
        new GenericOptionsParser(job, args);
        job.setJarByClass(HybridDriver.class);
        job.setJobName("Converting binary similarity scores to text");
        job.setMapperClass(IDMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setNumReduceTasks(0);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        Path inputPath = new Path(OUTPUT_DIR);
        job.setInputFormat(SequenceFileInputFormat.class);
        SequenceFileInputFormat.setInputPaths(job, inputPath);
        Path outputPath = new Path("SimilarityScores"); 
        job.setOutputFormat(TextOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, outputPath);
        FileSystem.get(job).delete(outputPath, true);
        HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
        JobSubmitter.run(job,"BINARY TO TEXT",job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); 
    }
项目:pss    文件:SeqWriter.java   
/**
 * Runs a MR job with maps only to convert input directory of numeric valued
 * records to hadoop sequence format. It assumes a text input of format of
 * [id feature weight ..] to be the format of input.
 */
public static void writeSequence() throws IOException {

    JobConf job = new JobConf();
    job.setJobName("Convert text vectors to hadoop seqeunce ");
    job.setJarByClass(SeqWriter.class);

    job.setMapperClass(SeqMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
    FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
    Path outputPath = new Path(OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    JobSubmitter.run(job,"PREPROCESS",-1);
}
项目:learning-spark-examples    文件:BasicSaveSequenceFile.java   
public static void main(String[] args) throws Exception {
    if (args.length != 2) {
     throw new Exception("Usage BasicSaveSequenceFile [sparkMaster] [output]");
    }
   String master = args[0];
   String fileName = args[1];

    JavaSparkContext sc = new JavaSparkContext(
     master, "basicloadsequencefile", System.getenv("SPARK_HOME"), System.getenv("JARS"));
   List<Tuple2<String, Integer>> input = new ArrayList();
   input.add(new Tuple2("coffee", 1));
   input.add(new Tuple2("coffee", 2));
   input.add(new Tuple2("pandas", 3));
   JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(input);
   JavaPairRDD<Text, IntWritable> result = rdd.mapToPair(new ConvertToWritableTypes());
   result.saveAsHadoopFile(fileName, Text.class, IntWritable.class, SequenceFileOutputFormat.class);
}
项目:HiBench-CDH5    文件:GenKMeansDataset.java   
public long produceSamples(Path samplePath) throws Exception {
      Path input = new Path(samplePath.toString()+"-seeds");
this.numSamples = writeSeeds(input);
      LOG.info("Generating "+this.numSamples+" of samples");

JobConf jobConf = getJobConf();
jobConf.set("genkmeansdataset.dimensions",Integer.toString(dimension));

FileInputFormat.setInputPaths(jobConf, input);
        FileOutputFormat.setOutputPath(jobConf, samplePath);

        jobConf.setMapperClass(MapClass.class);

        jobConf.setInputFormat(SequenceFileInputFormat.class);
        jobConf.setOutputFormat(SequenceFileOutputFormat.class);
        jobConf.setOutputKeyClass(LongWritable.class);
        jobConf.setOutputValueClass(VectorWritable.class);      
jobConf.setNumReduceTasks(0);
JobClient.runJob(jobConf);      

      return this.numSamples;
  }
项目:RDFS    文件:DataFsck.java   
private JobConf createJobConf() {
  JobConf jobConf = new JobConf(getConf());
  String jobName = NAME + " " + dateForm.format(new Date(System.currentTimeMillis()));
  jobConf.setJobName(jobName);
  jobConf.setMapSpeculativeExecution(false);

  jobConf.setJarByClass(DataFsck.class);
  jobConf.setInputFormat(DataFsckInputFormat.class);
  jobConf.setOutputFormat(SequenceFileOutputFormat.class);
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);

  jobConf.setMapperClass(DataFsckMapper.class);
  jobConf.setNumReduceTasks(0);
  return jobConf;
}
项目:hadoop-consumer    文件:KafkaETLJob.java   
/**
 * Create a job configuration
 */
@SuppressWarnings("rawtypes")
public static JobConf createJobConf(String name, String topic, Props props, Class classobj) 
throws Exception {
    JobConf conf = getJobConf(name, props, classobj);

    conf.set("topic", topic);

    // input format
    conf.setInputFormat(KafkaETLInputFormat.class);

    //turn off mapper speculative execution
    conf.setMapSpeculativeExecution(false);

    // setup multiple outputs
    MultipleOutputs.addMultiNamedOutput(conf, "offsets", SequenceFileOutputFormat.class, 
                KafkaETLKey.class, BytesWritable.class);


    return conf;
}
项目:s3distcp    文件:CreateSampleData.java   
int runCreateJob(String inputPathString, String outputPathString, String jobName) throws IOException {
/* 134 */     JobConf jobConf = new JobConf(this.conf);
/* 135 */     jobConf.setJobName(jobName);
/* 136 */     jobConf.setMapSpeculativeExecution(false);
/*     */ 
/* 138 */     FileInputFormat.addInputPath(jobConf, new Path(inputPathString));
/* 139 */     FileOutputFormat.setOutputPath(jobConf, new Path(outputPathString));
/*     */ 
/* 141 */     jobConf.setInputFormat(SequenceFileInputFormat.class);
/* 142 */     jobConf.setOutputKeyClass(LongWritable.class);
/* 143 */     jobConf.setOutputValueClass(CreateFileInfo.class);
/* 144 */     jobConf.setMapperClass(CreateFileMapper.class);
/* 145 */     jobConf.setReducerClass(IdentityReducer.class);
/* 146 */     jobConf.setOutputFormat(SequenceFileOutputFormat.class);
/*     */ 
/* 148 */     RunningJob result = JobClient.runJob(jobConf);
/* 149 */     return result.isSuccessful() ? 0 : -1;
/*     */   }
项目:hadoop    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:aliyun-oss-hadoop-fs    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:fst-bench    文件:BayesData.java   
private void createBayesData() throws IOException, URISyntaxException {

    log.info("creating bayes text data ... ");

    JobConf job = new JobConf();

    Path fout = options.getResultPath();
    Utils.checkHdfsPath(fout);

    String jobname = "Create bayes data";
    job.setJobName(jobname);

    Utils.shareDict(options, job);

    setBayesOptions(job);

    FileInputFormat.setInputPaths(job, dummy.getPath());
    job.setInputFormat(NLineInputFormat.class);

    job.setJarByClass(CreateBayesPages.class);
    job.setMapperClass(CreateBayesPages.class);
    job.setNumReduceTasks(0);

    FileOutputFormat.setOutputPath(job, fout);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    log.info("Running Job: " +jobname);
    log.info("Pages file " + dummy.getPath() + " as input");
    log.info("Rankings file " + fout + " as output");
    JobClient.runJob(job);
    log.info("Finished Running Job: " + jobname);
}
项目:GeoCrawler    文件:LinkRank.java   
/**
 * Runs the inverter job. The inverter job flips outlinks to inlinks to be
 * passed into the analysis job.
 * 
 * @param nodeDb
 *          The node database to use.
 * @param outlinkDb
 *          The outlink database to use.
 * @param output
 *          The output directory.
 * 
 * @throws IOException
 *           If an error occurs while running the inverter job.
 */
private void runInverter(Path nodeDb, Path outlinkDb, Path output)
    throws IOException {

  // configure the inverter
  JobConf inverter = new NutchJob(getConf());
  inverter.setJobName("LinkAnalysis Inverter");
  FileInputFormat.addInputPath(inverter, nodeDb);
  FileInputFormat.addInputPath(inverter, outlinkDb);
  FileOutputFormat.setOutputPath(inverter, output);
  inverter.setInputFormat(SequenceFileInputFormat.class);
  inverter.setMapperClass(Inverter.class);
  inverter.setReducerClass(Inverter.class);
  inverter.setMapOutputKeyClass(Text.class);
  inverter.setMapOutputValueClass(ObjectWritable.class);
  inverter.setOutputKeyClass(Text.class);
  inverter.setOutputValueClass(LinkDatum.class);
  inverter.setOutputFormat(SequenceFileOutputFormat.class);
  inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
      false);

  // run the inverter job
  LOG.info("Starting inverter job");
  try {
    JobClient.runJob(inverter);
  } catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished inverter job.");
}
项目:FEL    文件:ExtractWikipediaAnchorText.java   
/**
 * Extracts redirects and the target for each.
 *
 * @param inputPath
 * @param outputPath
 * @throws IOException
 */
private void task0(String inputPath, String outputPath) throws IOException {
    LOG.info("Extracting redirects (phase 0)...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
    conf.setJobName(String.format("ExtractWikipediaAnchorText:phase0[input: %s, output: %s]", inputPath, outputPath));

    conf.setNumReduceTasks(1);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper0.class);
    conf.setReducerClass(IdentityReducer.class);

    JobClient.runJob(conf);
}
项目:FEL    文件:ExtractWikipediaAnchorText.java   
/**
 * Maps from Wikipedia article to (srcID, (targetID, anchor).
 *
 * @param inputPath
 * @param outputPath
 * @throws IOException
 */
private void task1(String inputPath, String outputPath) throws IOException {
    LOG.info("Extracting anchor text (phase 1)...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
    conf.setJobName(String.format("ExtractWikipediaAnchorText:phase1[input: %s, output: %s]", inputPath, outputPath));

    // 10 reducers is reasonable.
    conf.setNumReduceTasks(10);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(PairOfStringInt.class);
    conf.setMapOutputValueClass(PairOfStrings.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(PairOfStrings.class);

    conf.setMapperClass(MyMapper1.class);
    conf.setReducerClass(MyReducer1.class);
    conf.setPartitionerClass(MyPartitioner1.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);
}
项目:big-c    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:biojava-spark    文件:BiojavaSparkUtils.java   
/**
 * Write a list of PDB ids to a hadoop sequence file in MMTF format.
 * @param pdbCodeList the input list of PDB ids
 */
public static void writeToFile(List<String> pdbCodeList, String uri, String producer) {
    JavaSparkContext javaSparkContext = SparkUtils.getSparkContext();
    MmtfUtils.setUpBioJava();
    JavaPairRDD<Text, BytesWritable> distData =
            javaSparkContext.parallelize(pdbCodeList)
            .mapToPair(new PdbIdToMmtf(producer))
            .mapToPair(t -> new Tuple2<String, byte[]>(t._1, WriterUtils.gzipCompress(t._2)))
            .mapToPair(new StringByteToTextByteWriter());
    distData.saveAsHadoopFile(uri, Text.class, BytesWritable.class, SequenceFileOutputFormat.class);
    javaSparkContext.close();
}
项目:THUTag    文件:TextMapReduceJobConf.java   
public TextMapReduceJobConf() {
    super();
    this.setInputFormat(SequenceFileInputFormat.class);
    this.setOutputFormat(SequenceFileOutputFormat.class);
    this.setMapOutputKeyClass(Text.class);
    this.setMapOutputValueClass(Text.class);
    this.setOutputKeyClass(Text.class);
    this.setOutputValueClass(Text.class);
}
项目:THUTag    文件:TextMapReduceJobConf.java   
public TextMapReduceJobConf(Class jobClass) {
    super(jobClass);
    this.setInputFormat(SequenceFileInputFormat.class);
    this.setOutputFormat(SequenceFileOutputFormat.class);
    this.setMapOutputKeyClass(Text.class);
    this.setMapOutputValueClass(Text.class);
    this.setOutputKeyClass(Text.class);
    this.setOutputValueClass(Text.class);
}
项目:THUTag    文件:MapReduceHelper.java   
public static void SetSeqFileInputOutput(JobConf job, String inputPaths, Path output) throws IOException {
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, output);

    // Expand input pattern.
    FileSystem fs = FileSystem.get(job);
    String[] paths = inputPaths.split(",");
    for (String p : paths) {
        int lastslash = p.lastIndexOf("/");
        if (lastslash < 0) {
            p = "./" + p;
            lastslash = 1;
        }
        String parent = p.substring(0, lastslash);
        p = p.substring(lastslash + 1);
        // Each path is treated as a pattern.
        p = p.replace("\\", "\\\\");
        p = p.replace(".", "\\.");
        p = p.replace("*", ".*");
        p = p.replace("?", ".");
        LOG.info("Use pattern:" + p);
        Pattern re = Pattern.compile(p);
        // List all files.
        FileStatus[] files = fs.listStatus(new Path(parent));
        for (FileStatus f : files) {
            if (re.matcher(f.getPath().getName()).matches()) {
                SequenceFileInputFormat.addInputPath(job, f.getPath());
                LOG.info("Adding input:" + f.getPath());
            }
        }
    }
}
项目:hadoop-2.6.0-cdh5.4.3    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:hadoop-2.6.0-cdh5.4.3    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:hadoop-EAR    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:hadoop-EAR    文件:DataFsck.java   
List<SequenceFile.Reader> getOutputs(List<JobContext> submitted) throws IOException {
  List<SequenceFile.Reader> outputs = new ArrayList<SequenceFile.Reader>();
  for (JobContext ctx: submitted) {
    SequenceFile.Reader[] jobOutputs = SequenceFileOutputFormat.getReaders(
      getConf(),
      SequenceFileOutputFormat.getOutputPath(ctx.jobConf));
    for (SequenceFile.Reader r: jobOutputs) {
      outputs.add(r);
    }
  }
  return outputs;
}
项目:Acacia    文件:EdgelistPartitioner.java   
@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(EdgelistPartitioner.class);

    if (conf == null) {
        return;
    }
    String dir1 = "/user/miyuru/merged";
    String dir2 = "/user/miyuru/merged-out";

    // We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    // only delete dir2 because dir1 is uploaded externally.
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    conf.setInputFormat(WholeFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SequenceFileMapper.class);
    conf.setReducerClass(MultipleOutputsInvertedReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setJobName("EdgelistPartitioner");

    MultipleOutputs.addMultiNamedOutput(conf, "partition",
            TextOutputFormat.class, NullWritable.class, Text.class);

    JobClient.runJob(conf);
}
项目:hadoop-plus    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:hazelcast-jet    文件:WriteHdfsPTest.java   
@Parameterized.Parameters(name = "Executing: {0} {1}")
public static Collection<Object[]> parameters() {
    return Arrays.asList(
            new Object[]{TextOutputFormat.class, TextInputFormat.class},
            new Object[]{SequenceFileOutputFormat.class, SequenceFileInputFormat.class}
    );
}
项目:anthelion    文件:LinkRank.java   
/**
 * Runs the inverter job. The inverter job flips outlinks to inlinks to be
 * passed into the analysis job.
 * 
 * The inverter job takes a link loops database if it exists. It is an
 * optional componenet of link analysis due to its extreme computational and
 * space requirements but it can be very useful is weeding out and eliminating
 * link farms and other spam pages.
 * 
 * @param nodeDb The node database to use.
 * @param outlinkDb The outlink database to use.
 * @param loopDb The loop database to use if it exists.
 * @param output The output directory.
 * 
 * @throws IOException If an error occurs while running the inverter job.
 */
private void runInverter(Path nodeDb, Path outlinkDb, Path loopDb, Path output)
  throws IOException {

  // configure the inverter
  JobConf inverter = new NutchJob(getConf());
  inverter.setJobName("LinkAnalysis Inverter");
  FileInputFormat.addInputPath(inverter, nodeDb);
  FileInputFormat.addInputPath(inverter, outlinkDb);

  // add the loop database if it exists, isn't null
  if (loopDb != null) {
    FileInputFormat.addInputPath(inverter, loopDb);
  }
  FileOutputFormat.setOutputPath(inverter, output);
  inverter.setInputFormat(SequenceFileInputFormat.class);
  inverter.setMapperClass(Inverter.class);
  inverter.setReducerClass(Inverter.class);
  inverter.setMapOutputKeyClass(Text.class);
  inverter.setMapOutputValueClass(ObjectWritable.class);
  inverter.setOutputKeyClass(Text.class);
  inverter.setOutputValueClass(LinkDatum.class);
  inverter.setOutputFormat(SequenceFileOutputFormat.class);
  inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);

  // run the inverter job
  LOG.info("Starting inverter job");
  try {
    JobClient.runJob(inverter);
  }
  catch (IOException e) {
    LOG.error(StringUtils.stringifyException(e));
    throw e;
  }
  LOG.info("Finished inverter job.");
}
项目:FlexMap    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:hops    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:pss    文件:LengthSortMain.java   
/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based on vector lengths.
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(LengthSortMain.class.getSimpleName());
    job.setJarByClass(LengthSortMain.class);
    job.setMapperClass(LengthSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(LengthRangePartitioner.class);

    job.setReducerClass(LengthSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
            SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job, "Sort By Vector Lenghts",-1);
}
项目:pss    文件:NormSortMain.java   
/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting. Some of the produced partitions might be
 * merged later to reflect the number of partitions chosen by the user.
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName("NormSort");
    job.setJarByClass(NormSortMain.class);
    job.setMapperClass(NormSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(NormRangePartitioner.class);

    job.setReducerClass(NormSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
            SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job,"Sort By p-norm",-1);
}
项目:pss    文件:SigSortMain.java   
/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based signatures.
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(SigSortMain.class.getSimpleName());
    job.setJarByClass(SigSortMain.class);
    job.setMapperClass(SigSortMapper.class);
    job.setMapOutputKeyClass(BitSignature.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SigRangePartitioner.class);

    job.setReducerClass(SigSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
            SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(OUTPUT_PATH);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job,"Sort By Signature Bytes",-1);
}
项目:pss    文件:MaxwSortMain.java   
/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting.
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    // ToolRunner.printGenericCommandUsage(System.out);
    job.setJobName(MaxwSortMain.class.getSimpleName());
    job.setJarByClass(MaxwSortMain.class);
    job.setMapperClass(MaxwSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(MaxwRangePartitioner.class);

    job.setReducerClass(MaxwSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY,
            SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job,"Sort By infinity-Norm",-1);
}
项目:pss    文件:SignaturesGenerator.java   
public static void main(String[] args) throws Exception {
    JobConf job = new JobConf(SignaturesGenerator.class);
    new GenericOptionsParser(job, args);
    job.setJobName(SignaturesGenerator.class.getSimpleName());
    int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY,
            ProjectionLshDriver.LSH_NBITS_VALUE);
    setParameters();
    FileSystem fs = FileSystem.get(job);
    prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR));
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath);

    FileInputFormat.setInputPaths(job, INPUT_DIR);
    // Path(INPUT_DIR));
    FileOutputFormat.setOutputPath(job, outputPath);
    // FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);
    job.setInt("mapred.task.timeout", 6000000);

    job.setMapperClass(SigMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BitSignature.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);

    JobSubmitter.run(job,"LSH",-1);
}
项目:hadoop-TCP    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}
项目:hadoop-on-lustre    文件:MultipleSequenceFileOutputFormat.java   
@Override
protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs,
                                                 JobConf job,
                                                 String name,
                                                 Progressable arg3) 
throws IOException {
  if (theSequenceFileOutputFormat == null) {
    theSequenceFileOutputFormat = new SequenceFileOutputFormat<K,V>();
  }
  return theSequenceFileOutputFormat.getRecordWriter(fs, job, name, arg3);
}