Java 类org.apache.hadoop.mapred.LocatedFileStatusFetcher 实例源码

项目:hadoop    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  StopWatch sw = new StopWatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}
项目:aliyun-oss-hadoop-fs    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  StopWatch sw = new StopWatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  LOG.info("Total input files to process : " + result.size());
  return result;
}
项目:big-c    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  StopWatch sw = new StopWatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}
项目:hadoop-2.6.0-cdh5.4.3    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  Stopwatch sw = new Stopwatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: " + sw.elapsedMillis());
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}
项目:FlexMap    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  Stopwatch sw = new Stopwatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: " + sw.elapsedMillis());
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}
项目:hops    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  StopWatch sw = new StopWatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  LOG.info("Total input files to process : " + result.size());
  return result;
}
项目:hadoop-on-lustre2    文件:FileInputFormat.java   
/** List input directories.
 * Subclasses may override to, e.g., select only files matching a regular
 * expression. 
 * 
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(JobContext job
                                      ) throws IOException {
  Path[] dirs = getInputPaths(job);
  if (dirs.length == 0) {
    throw new IOException("No input paths specified in job");
  }

  // get tokens for all the required FileSystems..
  TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, 
                                      job.getConfiguration());

  // Whether we need to recursive look into the directory structure
  boolean recursive = getInputDirRecursive(job);

  // creates a MultiPathFilter with the hiddenFileFilter and the
  // user provided one (if any).
  List<PathFilter> filters = new ArrayList<PathFilter>();
  filters.add(hiddenFileFilter);
  PathFilter jobFilter = getInputPathFilter(job);
  if (jobFilter != null) {
    filters.add(jobFilter);
  }
  PathFilter inputFilter = new MultiPathFilter(filters);

  List<FileStatus> result = null;

  int numThreads = job.getConfiguration().getInt(LIST_STATUS_NUM_THREADS,
      DEFAULT_LIST_STATUS_NUM_THREADS);
  Stopwatch sw = new Stopwatch().start();
  if (numThreads == 1) {
    result = singleThreadedListStatus(job, dirs, inputFilter, recursive);
  } else {
    Iterable<FileStatus> locatedFiles = null;
    try {
      LocatedFileStatusFetcher locatedFileStatusFetcher = new LocatedFileStatusFetcher(
          job.getConfiguration(), dirs, recursive, inputFilter, true);
      locatedFiles = locatedFileStatusFetcher.getFileStatuses();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while getting file statuses");
    }
    result = Lists.newArrayList(locatedFiles);
  }

  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time taken to get FileStatuses: " + sw.elapsedMillis());
  }
  LOG.info("Total input paths to process : " + result.size()); 
  return result;
}