/** * Returns an ordered list of hosts that are hosting the blocks for this * region. The weight of each host is the sum of the block lengths of all * files on that host, so the first host in the list is the server which holds * the most bytes of the given region's HFiles. * * @param region region * @return ordered list of hosts holding blocks of the specified region */ protected HDFSBlocksDistribution internalGetTopBlockLocation(HRegionInfo region) { try { HTableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); return blocksDistribution; } } catch (IOException ioe) { LOG.warn("IOException during HDFSBlocksDistribution computation. for " + "region = " + region.getEncodedName(), ioe); } return new HDFSBlocksDistribution(); }
public HDFSBlocksDistribution getBlockDistribution(HRegionInfo hri) { HDFSBlocksDistribution blockDistbn = null; try { if (cache.asMap().containsKey(hri)) { blockDistbn = cache.get(hri); return blockDistbn; } else { LOG.debug("HDFSBlocksDistribution not found in cache for region " + hri.getRegionNameAsString()); blockDistbn = internalGetTopBlockLocation(hri); cache.put(hri, blockDistbn); return blockDistbn; } } catch (ExecutionException e) { LOG.warn("Error while fetching cache entry ", e); blockDistbn = internalGetTopBlockLocation(hri); cache.put(hri, blockDistbn); return blockDistbn; } }
void calculateRegionServerLocalities() { if (regionFinder == null) { LOG.warn("Region location finder found null, skipping locality calculations."); return; } for (int i = 0; i < regionsPerServer.length; i++) { HDFSBlocksDistribution distribution = new HDFSBlocksDistribution(); if (regionsPerServer[i].length > 0) { for (int j = 0; j < regionsPerServer[i].length; j++) { int regionIndex = regionsPerServer[i][j]; distribution.add(regionFinder.getBlockDistribution(regions[regionIndex])); } } else { LOG.debug("Server " + servers[i].getHostname() + " had 0 regions."); } localityPerServer[i] = distribution.getBlockLocalityIndex(servers[i].getHostname()); } }
/** * return the top hosts of the store files, used by the Split */ private static String[] getStoreDirHosts(final FileSystem fs, final Path path) throws IOException { FileStatus[] files = FSUtils.listStatus(fs, path); if (files == null) { return new String[] {}; } HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); for (FileStatus hfileStatus: files) { HDFSBlocksDistribution storeFileBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen()); hdfsBlocksDistribution.add(storeFileBlocksDistribution); } List<String> hosts = hdfsBlocksDistribution.getTopHosts(); return hosts.toArray(new String[hosts.size()]); }
/** * This is a helper function to compute HDFS block distribution on demand * * @param conf configuration * @param tableDescriptor HTableDescriptor of the table * @param regionInfo encoded name of the region * @param tablePath the table directory * @return The HDFS blocks distribution for the given region. * @throws IOException */ public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException { HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); FileSystem fs = tablePath.getFileSystem(conf); HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo); for (HColumnDescriptor family : tableDescriptor.getFamilies()) { Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family.getNameAsString()); if (storeFiles == null) continue; for (StoreFileInfo storeFileInfo : storeFiles) { try { hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs)); } catch (IOException ioe) { LOG.warn("Error getting hdfs block distribution for " + storeFileInfo); } } } return hdfsBlocksDistribution; }
/** * helper function to compute HDFS blocks distribution of a given reference file.For reference * file, we don't compute the exact value. We use some estimate instead given it might be good * enough. we assume bottom part takes the first half of reference file, top part takes the second * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile, * also the number and size of keys vary. If this estimate isn't good enough, we can improve it * later. * * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs, final Reference reference, final FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen() / 2; length = status.getLen() - status.getLen() / 2; } else { start = 0; length = status.getLen() / 2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * Compute the HDFS Block Distribution for this StoreFile */ public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) throws IOException { // guard against the case where we get the FileStatus from link, but by the time we // call compute the file is moved again if (this.link != null) { FileNotFoundException exToThrow = null; for (int i = 0; i < this.link.getLocations().length; i++) { try { return computeHDFSBlocksDistributionInternal(fs); } catch (FileNotFoundException ex) { // try the other location exToThrow = ex; } } throw exToThrow; } else { return computeHDFSBlocksDistributionInternal(fs); } }
@Test public void testInternalGetTopBlockLocation() throws Exception { for (int i = 0; i < ServerNum; i++) { HRegionServer server = cluster.getRegionServer(i); for (Region region : server.getOnlineRegions(tableName)) { // get region's hdfs block distribution by region and RegionLocationFinder, // they should have same result HDFSBlocksDistribution blocksDistribution1 = region.getHDFSBlocksDistribution(); HDFSBlocksDistribution blocksDistribution2 = finder.getBlockDistribution(region .getRegionInfo()); assertEquals(blocksDistribution1.getUniqueBlocksTotalWeight(), blocksDistribution2.getUniqueBlocksTotalWeight()); if (blocksDistribution1.getUniqueBlocksTotalWeight() != 0) { assertEquals(blocksDistribution1.getTopHosts().get(0), blocksDistribution2.getTopHosts() .get(0)); } } } }
/** * Returns an ordered list of hosts that are hosting the blocks for this * region. The weight of each host is the sum of the block lengths of all * files on that host, so the first host in the list is the server which * holds the most bytes of the given region's HFiles. * * @param fs the filesystem * @param region region * @return ordered list of hosts holding blocks of the specified region */ @SuppressWarnings("unused") private List<ServerName> getTopBlockLocations(FileSystem fs, HRegionInfo region) { List<ServerName> topServerNames = null; try { HTableDescriptor tableDescriptor = getTableDescriptor( region.getTableName()); if (tableDescriptor != null) { HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(config, tableDescriptor, region.getEncodedName()); List<String> topHosts = blocksDistribution.getTopHosts(); topServerNames = mapHostNameToServerName(topHosts); } } catch (IOException ioe) { LOG.debug("IOException during HDFSBlocksDistribution computation. for " + "region = " + region.getEncodedName() , ioe); } return topServerNames; }
/** * return the top hosts of the store files, used by the Split */ private static String[] getStoreDirHosts(final FileSystem fs, final Path path) throws IOException { FileStatus[] files = FSUtils.listStatus(fs, path, null); if (files == null) { return new String[] {}; } HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); for (FileStatus hfileStatus: files) { HDFSBlocksDistribution storeFileBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen()); hdfsBlocksDistribution.add(storeFileBlocksDistribution); } List<String> hosts = hdfsBlocksDistribution.getTopHosts(); return hosts.toArray(new String[hosts.size()]); }
/** * helper function to compute HDFS blocks distribution of a given reference file.For reference * file, we don't compute the exact value. We use some estimate instead given it might be good * enough. we assume bottom part takes the first half of reference file, top part takes the second * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile, * also the number and size of keys vary. If this estimate isn't good enough, we can improve it * later. * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(FileSystem fs, Reference reference, FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen() / 2; length = status.getLen() - status.getLen() / 2; } else { start = 0; length = status.getLen() / 2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * This is a helper function to compute HDFS block distribution on demand * @param conf configuration * @param tableDescriptor HTableDescriptor of the table * @param regionEncodedName encoded name of the region * @return The HDFS blocks distribution for the given region. * @throws IOException */ static public HDFSBlocksDistribution computeHDFSBlocksDistribution(Configuration conf, HTableDescriptor tableDescriptor, String regionEncodedName) throws IOException { HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); Path tablePath = FSUtils.getTablePath(FSUtils.getRootDir(conf), tableDescriptor.getName()); FileSystem fs = tablePath.getFileSystem(conf); for (HColumnDescriptor family : tableDescriptor.getFamilies()) { Path storeHomeDir = Store.getStoreHomedir(tablePath, regionEncodedName, family.getName()); if (!fs.exists(storeHomeDir)) continue; FileStatus[] hfilesStatus = null; hfilesStatus = fs.listStatus(storeHomeDir); for (FileStatus hfileStatus : hfilesStatus) { HDFSBlocksDistribution storeFileBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, hfileStatus, 0, hfileStatus.getLen()); hdfsBlocksDistribution.add(storeFileBlocksDistribution); } } return hdfsBlocksDistribution; }
/** * Returns an ordered list of hosts that are hosting the blocks for this * region. The weight of each host is the sum of the block lengths of all * files on that host, so the first host in the list is the server which holds * the most bytes of the given region's HFiles. * * @param region region * @return ordered list of hosts holding blocks of the specified region */ protected List<ServerName> internalGetTopBlockLocation(HRegionInfo region) { List<ServerName> topServerNames = null; try { HTableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); List<String> topHosts = blocksDistribution.getTopHosts(); topServerNames = mapHostNameToServerName(topHosts); } } catch (IOException ioe) { LOG.debug("IOException during HDFSBlocksDistribution computation. for " + "region = " + region.getEncodedName(), ioe); } return topServerNames; }
/** * This is a helper function to compute HDFS block distribution on demand * * @param conf configuration * @param tableDescriptor HTableDescriptor of the table * @param regionInfo encoded name of the region * @param tablePath the table directory * @return The HDFS blocks distribution for the given region. * @throws IOException */ public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException { HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); FileSystem fs = tablePath.getFileSystem(conf); HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo); for (HColumnDescriptor family : tableDescriptor.getFamilies()) { Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family.getNameAsString()); if (storeFiles == null) continue; for (StoreFileInfo storeFileInfo : storeFiles) { hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs)); } } return hdfsBlocksDistribution; }
/** * Compute the HDFS Block Distribution for this StoreFile */ public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) throws IOException { // guard agains the case where we get the FileStatus from link, but by the time we // call compute the file is moved again if (this.link != null) { FileNotFoundException exToThrow = null; for (int i = 0; i < this.link.getLocations().length; i++) { try { return computeHDFSBlocksDistributionInternal(fs); } catch (FileNotFoundException ex) { // try the other location exToThrow = ex; } } throw exToThrow; } else { return computeHDFSBlocksDistributionInternal(fs); } }
/** * helper function to compute HDFS blocks distribution of a given reference * file.For reference file, we don't compute the exact value. We use some * estimate instead given it might be good enough. we assume bottom part * takes the first half of reference file, top part takes the second half * of the reference file. This is just estimate, given * midkey ofregion != midkey of HFile, also the number and size of keys vary. * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( final FileSystem fs, final Reference reference, final FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen()/2; length = status.getLen() - status.getLen()/2; } else { start = 0; length = status.getLen()/2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * This is a helper function to compute HDFS block distribution on demand * @param conf configuration * @param tableDescriptor HTableDescriptor of the table * @param regionInfo encoded name of the region * @param tablePath the table directory * @return The HDFS blocks distribution for the given region. * @throws IOException */ public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo, Path tablePath) throws IOException { HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); FileSystem fs = tablePath.getFileSystem(conf); HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo); for (HColumnDescriptor family: tableDescriptor.getFamilies()) { Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family.getNameAsString()); if (storeFiles == null) continue; for (StoreFileInfo storeFileInfo : storeFiles) { hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs)); } } return hdfsBlocksDistribution; }
/** * Compute the HDFS Block Distribution for this StoreFile */ public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) throws IOException { FileStatus status; if (this.reference != null) { if (this.link != null) { // HFileLink Reference status = link.getFileStatus(fs); } else { // HFile Reference Path referencePath = getReferredToFile(this.getPath()); status = fs.getFileStatus(referencePath); } return computeRefFileHDFSBlockDistribution(fs, reference, status); } else { if (this.link != null) { // HFileLink status = link.getFileStatus(fs); } else { status = this.fileStatus; } return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen()); } }
/** * helper function to compute HDFS blocks distribution of a given reference * file.For reference file, we don't compute the exact value. We use some * estimate instead given it might be good enough. we assume bottom part * takes the first half of reference file, top part takes the second half * of the reference file. This is just estimate, given * midkey ofregion != midkey of HFile, also the number and size of keys vary. * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference * @param status The reference FileStatus * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( FileSystem fs, Reference reference, FileStatus status) throws IOException { if (status == null) { return null; } long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen()/2; length = status.getLen() - status.getLen()/2; } else { start = 0; length = status.getLen()/2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * Returns an ordered list of hosts that are hosting the blocks for this * region. The weight of each host is the sum of the block lengths of all * files on that host, so the first host in the list is the server which holds * the most bytes of the given region's HFiles. * * @param region region * @return ordered list of hosts holding blocks of the specified region */ protected HDFSBlocksDistribution internalGetTopBlockLocation(RegionInfo region) { try { TableDescriptor tableDescriptor = getTableDescriptor(region.getTable()); if (tableDescriptor != null) { HDFSBlocksDistribution blocksDistribution = HRegion.computeHDFSBlocksDistribution(getConf(), tableDescriptor, region); return blocksDistribution; } } catch (IOException ioe) { LOG.warn("IOException during HDFSBlocksDistribution computation. for " + "region = " + region.getEncodedName(), ioe); } return EMPTY_BLOCK_DISTRIBUTION; }
public HDFSBlocksDistribution getBlockDistribution(RegionInfo hri) { HDFSBlocksDistribution blockDistbn = null; try { if (cache.asMap().containsKey(hri)) { blockDistbn = cache.get(hri); return blockDistbn; } else { LOG.debug("HDFSBlocksDistribution not found in cache for region " + hri.getRegionNameAsString()); blockDistbn = internalGetTopBlockLocation(hri); cache.put(hri, blockDistbn); return blockDistbn; } } catch (ExecutionException e) { LOG.warn("Error while fetching cache entry ", e); blockDistbn = internalGetTopBlockLocation(hri); cache.put(hri, blockDistbn); return blockDistbn; } }
@Test public void testInternalGetTopBlockLocation() throws Exception { for (int i = 0; i < ServerNum; i++) { HRegionServer server = cluster.getRegionServer(i); for (HRegion region : server.getRegions(tableName)) { // get region's hdfs block distribution by region and RegionLocationFinder, // they should have same result HDFSBlocksDistribution blocksDistribution1 = region.getHDFSBlocksDistribution(); HDFSBlocksDistribution blocksDistribution2 = finder.getBlockDistribution(region .getRegionInfo()); assertEquals(blocksDistribution1.getUniqueBlocksTotalWeight(), blocksDistribution2.getUniqueBlocksTotalWeight()); if (blocksDistribution1.getUniqueBlocksTotalWeight() != 0) { assertEquals(blocksDistribution1.getTopHosts().get(0), blocksDistribution2.getTopHosts() .get(0)); } } } }
/** * helper function to compute HDFS blocks distribution of a given reference * file.For reference file, we don't compute the exact value. We use some * estimate instead given it might be good enough. we assume bottom part * takes the first half of reference file, top part takes the second half * of the reference file. This is just estimate, given * midkey ofregion != midkey of HFile, also the number and size of keys vary. * If this estimate isn't good enough, we can improve it later. * @param fs The FileSystem * @param reference The reference * @param reference The referencePath * @return HDFS blocks distribution */ static private HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( FileSystem fs, Reference reference, Path referencePath) throws IOException { if ( referencePath == null) { return null; } FileStatus status = fs.getFileStatus(referencePath); long start = 0; long length = 0; if (Reference.isTopFileRegion(reference.getFileRegion())) { start = status.getLen()/2; length = status.getLen() - status.getLen()/2; } else { start = 0; length = status.getLen()/2; } return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); }
/** * This is a helper function to compute HDFS block distribution on demand * @param conf configuration * @param tableDescriptor HTableDescriptor of the table * @param regionInfo encoded name of the region * @return The HDFS blocks distribution for the given region. * @throws IOException */ public static HDFSBlocksDistribution computeHDFSBlocksDistribution(final Configuration conf, final HTableDescriptor tableDescriptor, final HRegionInfo regionInfo) throws IOException { HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); Path tablePath = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableDescriptor.getTableName()); FileSystem fs = tablePath.getFileSystem(conf); HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tablePath, regionInfo); for (HColumnDescriptor family: tableDescriptor.getFamilies()) { Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family.getNameAsString()); if (storeFiles == null) continue; for (StoreFileInfo storeFileInfo : storeFiles) { hdfsBlocksDistribution.add(storeFileInfo.computeHDFSBlocksDistribution(fs)); } } return hdfsBlocksDistribution; }