/** * Split a region on the region server. * * @param controller the RPC controller * @param request the request * @throws ServiceException */ @Override @QosPriority(priority=HConstants.ADMIN_QOS) public SplitRegionResponse splitRegion(final RpcController controller, final SplitRegionRequest request) throws ServiceException { try { checkOpen(); requestCount.increment(); Region region = getRegion(request.getRegion()); region.startRegionOperation(Operation.SPLIT_REGION); if (region.getRegionInfo().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { throw new IOException("Can't split replicas directly. " + "Replicas are auto-split when their primary is split."); } LOG.info("Splitting " + region.getRegionInfo().getRegionNameAsString()); long startTime = EnvironmentEdgeManager.currentTime(); FlushResult flushResult = region.flush(true); if (flushResult.isFlushSucceeded()) { long endTime = EnvironmentEdgeManager.currentTime(); regionServer.metricsRegionServer.updateFlushTime(endTime - startTime); } byte[] splitPoint = null; if (request.hasSplitPoint()) { splitPoint = request.getSplitPoint().toByteArray(); } ((HRegion)region).forceSplit(splitPoint); regionServer.compactSplitThread.requestSplit(region, ((HRegion)region).checkSplit(), RpcServer.getRequestUser()); return SplitRegionResponse.newBuilder().build(); } catch (DroppedSnapshotException ex) { regionServer.abort("Replay of WAL required. Forcing server shutdown", ex); throw new ServiceException(ex); } catch (IOException ie) { throw new ServiceException(ie); } }
/** * Flush a region on the region server. * * @param controller the RPC controller * @param request the request * @throws ServiceException */ @Override @QosPriority(priority=HConstants.HIGH_QOS) public FlushRegionResponse flushRegion(final RpcController controller, final FlushRegionRequest request) throws ServiceException { try { checkOpen(); requestCount.increment(); HRegion region = getRegion(request.getRegion()); LOG.info("Flushing " + region.getRegionNameAsString()); boolean shouldFlush = true; if (request.hasIfOlderThanTs()) { shouldFlush = region.getLastFlushTime() < request.getIfOlderThanTs(); } FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder(); if (shouldFlush) { boolean result = region.flushcache().isCompactionNeeded(); if (result) { this.compactSplitThread.requestSystemCompaction(region, "Compaction through user triggered flush"); } builder.setFlushed(result); } builder.setLastFlushTime(region.getLastFlushTime()); return builder.build(); } catch (DroppedSnapshotException ex) { // Cache flush can fail in a few places. If it fails in a critical // section, we get a DroppedSnapshotException and a replay of hlog // is required. Currently the only way to do this is a restart of // the server. abort("Replay of HLog required. Forcing server shutdown", ex); throw new ServiceException(ex); } catch (IOException ie) { throw new ServiceException(ie); } }
/** * Flush a region on the region server. * * @param controller the RPC controller * @param request the request * @throws ServiceException */ @Override @QosPriority(priority=HConstants.HIGH_QOS) public FlushRegionResponse flushRegion(final RpcController controller, final FlushRegionRequest request) throws ServiceException { try { checkOpen(); requestCount.increment(); HRegion region = getRegion(request.getRegion()); LOG.info("Flushing " + region.getRegionNameAsString()); boolean shouldFlush = true; if (request.hasIfOlderThanTs()) { shouldFlush = region.getLastFlushTime() < request.getIfOlderThanTs(); } FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder(); if (shouldFlush) { boolean result = region.flushcache().isCompactionNeeded(); if (result) { regionServer.compactSplitThread.requestSystemCompaction(region, "Compaction through user triggered flush"); } builder.setFlushed(result); } builder.setLastFlushTime(region.getLastFlushTime()); return builder.build(); } catch (DroppedSnapshotException ex) { // Cache flush can fail in a few places. If it fails in a critical // section, we get a DroppedSnapshotException and a replay of hlog // is required. Currently the only way to do this is a restart of // the server. regionServer.abort("Replay of HLog required. Forcing server shutdown", ex); throw new ServiceException(ex); } catch (IOException ie) { throw new ServiceException(ie); } }
/** * Flush a region on the region server. * * @param controller the RPC controller * @param request the request * @throws ServiceException */ @Override @QosPriority(priority=HConstants.ADMIN_QOS) public FlushRegionResponse flushRegion(final RpcController controller, final FlushRegionRequest request) throws ServiceException { try { checkOpen(); requestCount.increment(); Region region = getRegion(request.getRegion()); LOG.info("Flushing " + region.getRegionInfo().getRegionNameAsString()); boolean shouldFlush = true; if (request.hasIfOlderThanTs()) { shouldFlush = region.getEarliestFlushTimeForAllStores() < request.getIfOlderThanTs(); } FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder(); if (shouldFlush) { boolean writeFlushWalMarker = request.hasWriteFlushWalMarker() ? request.getWriteFlushWalMarker() : false; long startTime = EnvironmentEdgeManager.currentTime(); // Go behind the curtain so we can manage writing of the flush WAL marker HRegion.FlushResultImpl flushResult = (HRegion.FlushResultImpl) ((HRegion)region).flushcache(true, writeFlushWalMarker); if (flushResult.isFlushSucceeded()) { long endTime = EnvironmentEdgeManager.currentTime(); regionServer.metricsRegionServer.updateFlushTime(endTime - startTime); } boolean compactionNeeded = flushResult.isCompactionNeeded(); if (compactionNeeded) { regionServer.compactSplitThread.requestSystemCompaction(region, "Compaction through user triggered flush"); } builder.setFlushed(flushResult.isFlushSucceeded()); builder.setWroteFlushWalMarker(flushResult.wroteFlushWalMarker); } builder.setLastFlushTime(region.getEarliestFlushTimeForAllStores()); return builder.build(); } catch (DroppedSnapshotException ex) { // Cache flush can fail in a few places. If it fails in a critical // section, we get a DroppedSnapshotException and a replay of wal // is required. Currently the only way to do this is a restart of // the server. regionServer.abort("Replay of WAL required. Forcing server shutdown", ex); throw new ServiceException(ex); } catch (IOException ie) { throw new ServiceException(ie); } }
@Test public void test() throws IOException, InterruptedException { final HRegionServer rs = testUtil.getRSForFirstRegionInTable(tableName); final HRegion region = (HRegion) rs.getOnlineRegions(tableName).get(0); HRegion spiedRegion = spy(region); final MutableBoolean flushed = new MutableBoolean(false); final MutableBoolean reported = new MutableBoolean(false); doAnswer(new Answer<FlushResult>() { @Override public FlushResult answer(InvocationOnMock invocation) throws Throwable { synchronized (flushed) { flushed.setValue(true); flushed.notifyAll(); } synchronized (reported) { while (!reported.booleanValue()) { reported.wait(); } } rs.getWAL(region.getRegionInfo()).abortCacheFlush( region.getRegionInfo().getEncodedNameAsBytes()); throw new DroppedSnapshotException("testcase"); } }).when(spiedRegion).internalFlushCacheAndCommit(Matchers.<WAL> any(), Matchers.<MonitoredTask> any(), Matchers.<PrepareFlushResult> any(), Matchers.<Collection<Store>> any()); // Find region key; don't pick up key for hbase:meta by mistake. String key = null; for (Map.Entry<String, Region> entry: rs.onlineRegions.entrySet()) { if (entry.getValue().getRegionInfo().getTable().equals(this.tableName)) { key = entry.getKey(); break; } } rs.onlineRegions.put(key, spiedRegion); Connection conn = testUtil.getConnection(); try (Table table = conn.getTable(tableName)) { table.put(new Put(Bytes.toBytes("row0")).addColumn(family, qualifier, Bytes.toBytes("val0"))); } long oldestSeqIdOfStore = region.getOldestSeqIdOfStore(family); LOG.info("CHANGE OLDEST " + oldestSeqIdOfStore); assertTrue(oldestSeqIdOfStore > HConstants.NO_SEQNUM); rs.cacheFlusher.requestFlush(spiedRegion, false); synchronized (flushed) { while (!flushed.booleanValue()) { flushed.wait(); } } try (Table table = conn.getTable(tableName)) { table.put(new Put(Bytes.toBytes("row1")).addColumn(family, qualifier, Bytes.toBytes("val1"))); } long now = EnvironmentEdgeManager.currentTime(); rs.tryRegionServerReport(now - 500, now); synchronized (reported) { reported.setValue(true); reported.notifyAll(); } while (testUtil.getRSForFirstRegionInTable(tableName) == rs) { Thread.sleep(100); } try (Table table = conn.getTable(tableName)) { Result result = table.get(new Get(Bytes.toBytes("row0"))); assertArrayEquals(Bytes.toBytes("val0"), result.getValue(family, qualifier)); } }
/** * Test we do not lose data if we fail a flush and then close. * Part of HBase-10466. Tests the following from the issue description: * "Bug 1: Wrong calculation of HRegion.memstoreSize: When a flush fails, data to be flushed is * kept in each MemStore's snapshot and wait for next flush attempt to continue on it. But when * the next flush succeeds, the counter of total memstore size in HRegion is always deduced by * the sum of current memstore sizes instead of snapshots left from previous failed flush. This * calculation is problematic that almost every time there is failed flush, HRegion.memstoreSize * gets reduced by a wrong value. If region flush could not proceed for a couple cycles, the size * in current memstore could be much larger than the snapshot. It's likely to drift memstoreSize * much smaller than expected. In extreme case, if the error accumulates to even bigger than * HRegion's memstore size limit, any further flush is skipped because flush does not do anything * if memstoreSize is not larger than 0." * @throws Exception */ @Test (timeout=60000) public void testFlushSizeAccounting() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, name.getMethodName(), conf, COLUMN_FAMILY_BYTES); long size = region.getMemstoreSize().get(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); region.put(p1); final long sizeOfOnePut = region.getMemstoreSize().get(); // Fail a flush which means the current memstore will hang out as memstore 'snapshot'. try { LOG.info("Flushing"); region.flushcache(); Assert.fail("Didn't bubble up IOE!"); } catch (DroppedSnapshotException dse) { // What we are expecting } // Make it so all writes succeed from here on out ffs.fault.set(false); // Check sizes. Should still be the one entry. Assert.assertEquals(sizeOfOnePut, region.getMemstoreSize().get()); // Now add two entries so that on this next flush that fails, we can see if we // subtract the right amount, the snapshot size only. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); Assert.assertEquals(sizeOfOnePut * 3, region.getMemstoreSize().get()); // Do a successful flush. It will clear the snapshot only. Thats how flushes work. // If already a snapshot, we clear it else we move the memstore to be snapshot and flush // it region.flushcache(); // Make sure our memory accounting is right. Assert.assertEquals(sizeOfOnePut * 2, region.getMemstoreSize().get()); } finally { HRegion.closeHRegion(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }
@Test (timeout=60000) public void testCloseWithFailingFlush() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, this.name.getMethodName(), new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, name.getMethodName(), conf, COLUMN_FAMILY_BYTES); long size = region.getMemstoreSize().get(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); region.put(p1); // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only. Store store = region.getStore(COLUMN_FAMILY_BYTES); StoreFlushContext storeFlushCtx = store.createFlushContext(12345); storeFlushCtx.prepare(); // Now add two entries to the foreground memstore. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); // Now try close on top of a failing flush. region.close(); fail(); } catch (DroppedSnapshotException dse) { // Expected LOG.info("Expected DroppedSnapshotException"); } finally { // Make it so all writes succeed from here on out so can close clean ffs.fault.set(false); HRegion.closeHRegion(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }
/** * Flush a region on the region server. * * @param controller the RPC controller * @param request the request * @throws ServiceException */ @Override @QosPriority(priority=HConstants.ADMIN_QOS) public FlushRegionResponse flushRegion(final RpcController controller, final FlushRegionRequest request) throws ServiceException { try { checkOpen(); requestCount.increment(); HRegion region = getRegion(request.getRegion()); LOG.info("Flushing " + region.getRegionInfo().getRegionNameAsString()); boolean shouldFlush = true; if (request.hasIfOlderThanTs()) { shouldFlush = region.getEarliestFlushTimeForAllStores() < request.getIfOlderThanTs(); } FlushRegionResponse.Builder builder = FlushRegionResponse.newBuilder(); if (shouldFlush) { boolean writeFlushWalMarker = request.hasWriteFlushWalMarker() ? request.getWriteFlushWalMarker() : false; // Go behind the curtain so we can manage writing of the flush WAL marker HRegion.FlushResultImpl flushResult = region.flushcache(true, writeFlushWalMarker, FlushLifeCycleTracker.DUMMY); boolean compactionNeeded = flushResult.isCompactionNeeded(); if (compactionNeeded) { regionServer.compactSplitThread.requestSystemCompaction(region, "Compaction through user triggered flush"); } builder.setFlushed(flushResult.isFlushSucceeded()); builder.setWroteFlushWalMarker(flushResult.wroteFlushWalMarker); } builder.setLastFlushTime(region.getEarliestFlushTimeForAllStores()); return builder.build(); } catch (DroppedSnapshotException ex) { // Cache flush can fail in a few places. If it fails in a critical // section, we get a DroppedSnapshotException and a replay of wal // is required. Currently the only way to do this is a restart of // the server. regionServer.abort("Replay of WAL required. Forcing server shutdown", ex); throw new ServiceException(ex); } catch (IOException ie) { throw new ServiceException(ie); } }
@Test public void test() throws IOException, InterruptedException { final HRegionServer rs = testUtil.getRSForFirstRegionInTable(tableName); final HRegion region = (HRegion) rs.getRegions(tableName).get(0); HRegion spiedRegion = spy(region); final MutableBoolean flushed = new MutableBoolean(false); final MutableBoolean reported = new MutableBoolean(false); doAnswer(new Answer<FlushResult>() { @Override public FlushResult answer(InvocationOnMock invocation) throws Throwable { synchronized (flushed) { flushed.setValue(true); flushed.notifyAll(); } synchronized (reported) { while (!reported.booleanValue()) { reported.wait(); } } rs.getWAL(region.getRegionInfo()).abortCacheFlush( region.getRegionInfo().getEncodedNameAsBytes()); throw new DroppedSnapshotException("testcase"); } }).when(spiedRegion).internalFlushCacheAndCommit(Matchers.<WAL> any(), Matchers.<MonitoredTask> any(), Matchers.<PrepareFlushResult> any(), Matchers.<Collection<HStore>> any()); // Find region key; don't pick up key for hbase:meta by mistake. String key = null; for (Map.Entry<String, HRegion> entry: rs.onlineRegions.entrySet()) { if (entry.getValue().getRegionInfo().getTable().equals(this.tableName)) { key = entry.getKey(); break; } } rs.onlineRegions.put(key, spiedRegion); Connection conn = testUtil.getConnection(); try (Table table = conn.getTable(tableName)) { table.put(new Put(Bytes.toBytes("row0")) .addColumn(family, qualifier, Bytes.toBytes("val0"))); } long oldestSeqIdOfStore = region.getOldestSeqIdOfStore(family); LOG.info("CHANGE OLDEST " + oldestSeqIdOfStore); assertTrue(oldestSeqIdOfStore > HConstants.NO_SEQNUM); rs.cacheFlusher.requestFlush(spiedRegion, false, FlushLifeCycleTracker.DUMMY); synchronized (flushed) { while (!flushed.booleanValue()) { flushed.wait(); } } try (Table table = conn.getTable(tableName)) { table.put(new Put(Bytes.toBytes("row1")) .addColumn(family, qualifier, Bytes.toBytes("val1"))); } long now = EnvironmentEdgeManager.currentTime(); rs.tryRegionServerReport(now - 500, now); synchronized (reported) { reported.setValue(true); reported.notifyAll(); } while (testUtil.getRSForFirstRegionInTable(tableName) == rs) { Thread.sleep(100); } try (Table table = conn.getTable(tableName)) { Result result = table.get(new Get(Bytes.toBytes("row0"))); assertArrayEquals(Bytes.toBytes("val0"), result.getValue(family, qualifier)); } }
/** * Test we do not lose data if we fail a flush and then close. * Part of HBase-10466. Tests the following from the issue description: * "Bug 1: Wrong calculation of HRegion.memstoreSize: When a flush fails, data to be flushed is * kept in each MemStore's snapshot and wait for next flush attempt to continue on it. But when * the next flush succeeds, the counter of total memstore size in HRegion is always deduced by * the sum of current memstore sizes instead of snapshots left from previous failed flush. This * calculation is problematic that almost every time there is failed flush, HRegion.memstoreSize * gets reduced by a wrong value. If region flush could not proceed for a couple cycles, the size * in current memstore could be much larger than the snapshot. It's likely to drift memstoreSize * much smaller than expected. In extreme case, if the error accumulates to even bigger than * HRegion's memstore size limit, any further flush is skipped because flush does not do anything * if memstoreSize is not larger than 0." * @throws Exception */ @Test public void testFlushSizeAccounting() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); final WAL wal = createWALCompatibleWithFaultyFileSystem(method, conf, tableName); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, method, new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); long size = region.getMemStoreSize(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[]) null)); region.put(p1); final long sizeOfOnePut = region.getMemStoreSize(); // Fail a flush which means the current memstore will hang out as memstore 'snapshot'. try { LOG.info("Flushing"); region.flush(true); Assert.fail("Didn't bubble up IOE!"); } catch (DroppedSnapshotException dse) { // What we are expecting region.closing.set(false); // this is needed for the rest of the test to work } // Make it so all writes succeed from here on out ffs.fault.set(false); // Check sizes. Should still be the one entry. Assert.assertEquals(sizeOfOnePut, region.getMemStoreSize()); // Now add two entries so that on this next flush that fails, we can see if we // subtract the right amount, the snapshot size only. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); long expectedSize = sizeOfOnePut * 3; Assert.assertEquals(expectedSize, region.getMemStoreSize()); // Do a successful flush. It will clear the snapshot only. Thats how flushes work. // If already a snapshot, we clear it else we move the memstore to be snapshot and flush // it region.flush(true); // Make sure our memory accounting is right. Assert.assertEquals(sizeOfOnePut * 2, region.getMemStoreSize()); } finally { HBaseTestingUtility.closeRegionAndWAL(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }
@Test public void testCloseWithFailingFlush() throws Exception { final Configuration conf = HBaseConfiguration.create(CONF); final WAL wal = createWALCompatibleWithFaultyFileSystem(method, conf, tableName); // Only retry once. conf.setInt("hbase.hstore.flush.retries.number", 1); final User user = User.createUserForTesting(conf, this.method, new String[]{"foo"}); // Inject our faulty LocalFileSystem conf.setClass("fs.file.impl", FaultyFileSystem.class, FileSystem.class); user.runAs(new PrivilegedExceptionAction<Object>() { @Override public Object run() throws Exception { // Make sure it worked (above is sensitive to caching details in hadoop core) FileSystem fs = FileSystem.get(conf); Assert.assertEquals(FaultyFileSystem.class, fs.getClass()); FaultyFileSystem ffs = (FaultyFileSystem)fs; HRegion region = null; try { // Initialize region region = initHRegion(tableName, null, null, false, Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); long size = region.getMemStoreSize(); Assert.assertEquals(0, size); // Put one item into memstore. Measure the size of one item in memstore. Put p1 = new Put(row); p1.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual1, 1, (byte[])null)); region.put(p1); // Manufacture an outstanding snapshot -- fake a failed flush by doing prepare step only. HStore store = region.getStore(COLUMN_FAMILY_BYTES); StoreFlushContext storeFlushCtx = store.createFlushContext(12345, FlushLifeCycleTracker.DUMMY); storeFlushCtx.prepare(); // Now add two entries to the foreground memstore. Put p2 = new Put(row); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual2, 2, (byte[])null)); p2.add(new KeyValue(row, COLUMN_FAMILY_BYTES, qual3, 3, (byte[])null)); region.put(p2); // Now try close on top of a failing flush. region.close(); fail(); } catch (DroppedSnapshotException dse) { // Expected LOG.info("Expected DroppedSnapshotException"); } finally { // Make it so all writes succeed from here on out so can close clean ffs.fault.set(false); HBaseTestingUtility.closeRegionAndWAL(region); } return null; } }); FileSystem.closeAllForUGI(user.getUGI()); }