Java 类org.apache.hadoop.mapred.MultiFileSplit 实例源码

项目:hadoop-EAR    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:solr-hadoop-common    文件:WarcFileRecordReader.java   
public WarcFileRecordReader(Configuration conf, InputSplit split) throws IOException {
  this.fs = FileSystem.get(conf);
  this.conf = conf;
  if (split instanceof FileSplit) {
    this.filePathList = new Path[1];
    this.filePathList[0] = ((FileSplit) split).getPath();
  } else if (split instanceof MultiFileSplit) {
    this.filePathList = ((MultiFileSplit) split).getPaths();
  } else {
    throw new IOException("InputSplit is not a file split or a multi-file split - aborting");
  }

  // get the total file sizes
  for (int i = 0; i < filePathList.length; i++) {
    totalFileSize += fs.getFileStatus(filePathList[i]).getLen();
  }

  Class<? extends CompressionCodec> codecClass = null;

  try {
    codecClass = conf.getClassByName("org.apache.hadoop.io.compress.GzipCodec")
        .asSubclass(CompressionCodec.class);
    compressionCodec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
  } catch (ClassNotFoundException cnfEx) {
    compressionCodec = null;
    LOG.info("!!! ClassNotFound Exception thrown setting Gzip codec");
  }

  openNextFile();
}
项目:wikireverse    文件:WarcFileRecordReader.java   
public WarcFileRecordReader(Configuration conf, InputSplit split) throws IOException {
  if (split instanceof FileSplit) {
    this.filePathList=new Path[1];
    this.filePathList[0]=((FileSplit)split).getPath();
  } else if (split instanceof MultiFileSplit) {
    this.filePathList=((MultiFileSplit)split).getPaths();
  } else {
    throw new IOException("InputSplit is not a file split or a multi-file split - aborting");
  }

  // Use FileSystem.get to open Common Crawl URIs using the S3 protocol.
  URI uri = filePathList[0].toUri();
  this.fs = FileSystem.get(uri, conf);

  // get the total file sizes
  for (int i=0; i < filePathList.length; i++) {
    totalFileSize += fs.getFileStatus(filePathList[i]).getLen();
  }

  Class<? extends CompressionCodec> codecClass=null;

  try {
    codecClass=conf.getClassByName("org.apache.hadoop.io.compress.GzipCodec").asSubclass(CompressionCodec.class);
    compressionCodec=(CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);
  } catch (ClassNotFoundException cnfEx) {
    compressionCodec=null;
    LOG.info("!!! ClassNotFoun Exception thrown setting Gzip codec");
  }

  openNextFile();
}
项目:hadoop-on-lustre    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:elasticrawl-examples    文件:WarcFileRecordReader.java   
public WarcFileRecordReader(Configuration conf, InputSplit split) throws IOException {
  if (split instanceof FileSplit) {
    this.filePathList=new Path[1];
    this.filePathList[0]=((FileSplit)split).getPath();
  } else if (split instanceof MultiFileSplit) {
    this.filePathList=((MultiFileSplit)split).getPaths();
  } else {
    throw new IOException("InputSplit is not a file split or a multi-file split - aborting");
  }

  // Use FileSystem.get to open Common Crawl URIs using the S3 protocol.
  URI uri = filePathList[0].toUri();
  this.fs = FileSystem.get(uri, conf);

  // get the total file sizes
  for (int i=0; i < filePathList.length; i++) {
    totalFileSize += fs.getFileStatus(filePathList[i]).getLen();
  }

  Class<? extends CompressionCodec> codecClass=null;

  try {
    codecClass=conf.getClassByName("org.apache.hadoop.io.compress.GzipCodec").asSubclass(CompressionCodec.class);
    compressionCodec=(CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);
  } catch (ClassNotFoundException cnfEx) {
    compressionCodec=null;
    LOG.info("!!! ClassNotFoun Exception thrown setting Gzip codec");
  }

  openNextFile();
}
项目:RDFS    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:hadoop-0.20    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:hadoop-book    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
        throws IOException {

    this.split = split;
    fs = FileSystem.get(conf);
    this.paths = split.getPaths();
    this.totLength = split.getLength();
    this.offset = 0;

    //open the first file
    Path file = paths[count];
    currentStream = fs.open(file);
    currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:hortonworks-extension    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:hortonworks-extension    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:hadoop-gpu    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:logcenter    文件:MultiFileWordCount.java   
public MultiFileLineRecordReader(Configuration conf, MultiFileSplit split)
  throws IOException {

  this.split = split;
  fs = FileSystem.get(conf);
  this.paths = split.getPaths();
  this.totLength = split.getLength();
  this.offset = 0;

  //open the first file
  Path file = paths[count];
  currentStream = fs.open(file);
  currentReader = new BufferedReader(new InputStreamReader(currentStream));
}
项目:hadoop-EAR    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:hadoop-on-lustre    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:RDFS    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:hadoop-0.20    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:hadoop-book    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset, Text> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
    return new MultiFileLineRecordReader(job, (MultiFileSplit) split);
}
项目:hortonworks-extension    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:hortonworks-extension    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:hadoop-gpu    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}
项目:logcenter    文件:MultiFileWordCount.java   
@Override
public RecordReader<WordOffset,Text> getRecordReader(InputSplit split
    , JobConf job, Reporter reporter) throws IOException {
  return new MultiFileLineRecordReader(job, (MultiFileSplit)split);
}