public void exception(Throwable throwable) { source.exception(); /** * Keep some metrics for commonly seen exceptions * * Try and put the most common types first. * Place child types before the parent type that they extend. * * If this gets much larger we might have to go to a hashmap */ if (throwable != null) { if (throwable instanceof OutOfOrderScannerNextException) { source.outOfOrderException(); } else if (throwable instanceof RegionTooBusyException) { source.tooBusyException(); } else if (throwable instanceof UnknownScannerException) { source.unknownScannerException(); } else if (throwable instanceof RegionMovedException) { source.movedRegionException(); } else if (throwable instanceof NotServingRegionException) { source.notServingRegionException(); } else if (throwable instanceof FailedSanityCheckException) { source.failedSanityException(); } else if (throwable instanceof MultiActionResultTooLarge) { source.multiActionTooLargeException(); } } }
/** * Increment the count for a specific exception type. This is called for each exception type * that is returned to the thrift handler. * @param rawThrowable type of exception */ public void exception(Throwable rawThrowable) { source.exception(); Throwable throwable = unwrap(rawThrowable); /** * Keep some metrics for commonly seen exceptions * * Try and put the most common types first. * Place child types before the parent type that they extend. * * If this gets much larger we might have to go to a hashmap */ if (throwable != null) { if (throwable instanceof OutOfOrderScannerNextException) { source.outOfOrderException(); } else if (throwable instanceof RegionTooBusyException) { source.tooBusyException(); } else if (throwable instanceof UnknownScannerException) { source.unknownScannerException(); } else if (throwable instanceof ScannerResetException) { source.scannerResetException(); } else if (throwable instanceof RegionMovedException) { source.movedRegionException(); } else if (throwable instanceof NotServingRegionException) { source.notServingRegionException(); } else if (throwable instanceof FailedSanityCheckException) { source.failedSanityException(); } else if (throwable instanceof MultiActionResultTooLarge) { source.multiActionTooLargeException(); } else if (throwable instanceof CallQueueTooBigException) { source.callQueueTooBigException(); } } }
@Override public void preGetOp(ObserverContext<RegionCoprocessorEnvironment> e, Get get, List<Cell> results) throws IOException { byte[] errorType = get.getAttribute(SHOULD_ERROR_ATTRIBUTE); if (errorType != null) { ErrorType type = ErrorType.valueOf(Bytes.toString(errorType)); switch (type) { case CALL_QUEUE_TOO_BIG: throw new CallQueueTooBigException("Failing for test"); case MULTI_ACTION_RESULT_TOO_LARGE: throw new MultiActionResultTooLarge("Failing for test"); case FAILED_SANITY_CHECK: throw new FailedSanityCheckException("Failing for test"); case NOT_SERVING_REGION: throw new NotServingRegionException("Failing for test"); case REGION_MOVED: throw new RegionMovedException(e.getEnvironment().getServerName(), 1); case SCANNER_RESET: throw new ScannerResetException("Failing for test"); case UNKNOWN_SCANNER: throw new UnknownScannerException("Failing for test"); case REGION_TOO_BUSY: throw new RegionTooBusyException("Failing for test"); case OUT_OF_ORDER_SCANNER_NEXT: throw new OutOfOrderScannerNextException("Failing for test"); default: throw new DoNotRetryIOException("Failing for test"); } } }
private void checkScanNextCallSeq(ScanRequest request, RegionScannerHolder rsh) throws OutOfOrderScannerNextException { // if nextCallSeq does not match throw Exception straight away. This needs to be // performed even before checking of Lease. // See HBASE-5974 if (request.hasNextCallSeq()) { long callSeq = request.getNextCallSeq(); if (!rsh.incNextCallSeq(callSeq)) { throw new OutOfOrderScannerNextException("Expected nextCallSeq: " + rsh.getNextCallSeq() + " But the nextCallSeq got from client: " + request.getNextCallSeq() + "; request=" + TextFormat.shortDebugString(request)); } } }
public void exception(Throwable throwable) { source.exception(); /** * Keep some metrics for commonly seen exceptions * * Try and put the most common types first. * Place child types before the parent type that they extend. * * If this gets much larger we might have to go to a hashmap */ if (throwable != null) { if (throwable instanceof OutOfOrderScannerNextException) { source.outOfOrderException(); } else if (throwable instanceof RegionTooBusyException) { source.tooBusyException(); } else if (throwable instanceof UnknownScannerException) { source.unknownScannerException(); } else if (throwable instanceof ScannerResetException) { source.scannerResetException(); } else if (throwable instanceof RegionMovedException) { source.movedRegionException(); } else if (throwable instanceof NotServingRegionException) { source.notServingRegionException(); } else if (throwable instanceof FailedSanityCheckException) { source.failedSanityException(); } else if (throwable instanceof MultiActionResultTooLarge) { source.multiActionTooLargeException(); } else if (throwable instanceof CallQueueTooBigException) { source.callQueueTooBigException(); } } }
/** * Test to make sure that all the actively called method on MetricsHBaseServer work. */ @Test public void testSourceMethods() { MetricsHBaseServer mrpc = new MetricsHBaseServer("HMaster", new MetricsHBaseServerWrapperStub()); MetricsHBaseServerSource serverSource = mrpc.getMetricsSource(); for (int i=0; i < 12; i++) { mrpc.authenticationFailure(); } for (int i=0; i < 13; i++) { mrpc.authenticationSuccess(); } HELPER.assertCounter("authenticationFailures", 12, serverSource); HELPER.assertCounter("authenticationSuccesses", 13, serverSource); for (int i=0; i < 14; i++) { mrpc.authorizationSuccess(); } for (int i=0; i < 15; i++) { mrpc.authorizationFailure(); } HELPER.assertCounter("authorizationSuccesses", 14, serverSource); HELPER.assertCounter("authorizationFailures", 15, serverSource); mrpc.dequeuedCall(100); mrpc.processedCall(101); mrpc.totalCall(102); HELPER.assertCounter("queueCallTime_NumOps", 1, serverSource); HELPER.assertCounter("processCallTime_NumOps", 1, serverSource); HELPER.assertCounter("totalCallTime_NumOps", 1, serverSource); mrpc.sentBytes(103); mrpc.sentBytes(103); mrpc.sentBytes(103); mrpc.receivedBytes(104); mrpc.receivedBytes(104); HELPER.assertCounter("sentBytes", 309, serverSource); HELPER.assertCounter("receivedBytes", 208, serverSource); mrpc.receivedRequest(105); mrpc.sentResponse(106); HELPER.assertCounter("requestSize_NumOps", 1, serverSource); HELPER.assertCounter("responseSize_NumOps", 1, serverSource); mrpc.exception(null); HELPER.assertCounter("exceptions", 1, serverSource); mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100)); mrpc.exception(new RegionTooBusyException()); mrpc.exception(new OutOfOrderScannerNextException()); mrpc.exception(new NotServingRegionException()); HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource); HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource); HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource); HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource); HELPER.assertCounter("exceptions", 5, serverSource); }
/** * Test to make sure that all the actively called method on MetricsHBaseServer work. */ @Test public void testSourceMethods() { MetricsHBaseServer mrpc = new MetricsHBaseServer("HMaster", new MetricsHBaseServerWrapperStub()); MetricsHBaseServerSource serverSource = mrpc.getMetricsSource(); for (int i=0; i < 12; i++) { mrpc.authenticationFailure(); } for (int i=0; i < 13; i++) { mrpc.authenticationSuccess(); } HELPER.assertCounter("authenticationFailures", 12, serverSource); HELPER.assertCounter("authenticationSuccesses", 13, serverSource); for (int i=0; i < 14; i++) { mrpc.authorizationSuccess(); } for (int i=0; i < 15; i++) { mrpc.authorizationFailure(); } HELPER.assertCounter("authorizationSuccesses", 14, serverSource); HELPER.assertCounter("authorizationFailures", 15, serverSource); mrpc.dequeuedCall(100); mrpc.processedCall(101); mrpc.totalCall(102); HELPER.assertCounter("queueCallTime_NumOps", 1, serverSource); HELPER.assertCounter("processCallTime_NumOps", 1, serverSource); HELPER.assertCounter("totalCallTime_NumOps", 1, serverSource); mrpc.sentBytes(103); mrpc.sentBytes(103); mrpc.sentBytes(103); mrpc.receivedBytes(104); mrpc.receivedBytes(104); HELPER.assertCounter("sentBytes", 309, serverSource); HELPER.assertCounter("receivedBytes", 208, serverSource); mrpc.receivedRequest(105); mrpc.sentResponse(106); HELPER.assertCounter("requestSize_NumOps", 1, serverSource); HELPER.assertCounter("responseSize_NumOps", 1, serverSource); mrpc.exception(null); HELPER.assertCounter("exceptions", 1, serverSource); mrpc.exception(new RegionMovedException(ServerName.parseServerName("localhost:60020"), 100)); mrpc.exception(new RegionTooBusyException("Some region")); mrpc.exception(new OutOfOrderScannerNextException()); mrpc.exception(new NotServingRegionException()); HELPER.assertCounter("exceptions.RegionMovedException", 1, serverSource); HELPER.assertCounter("exceptions.RegionTooBusyException", 1, serverSource); HELPER.assertCounter("exceptions.OutOfOrderScannerNextException", 1, serverSource); HELPER.assertCounter("exceptions.NotServingRegionException", 1, serverSource); HELPER.assertCounter("exceptions", 5, serverSource); }
private void handleScanError(DoNotRetryIOException e, MutableBoolean retryAfterOutOfOrderException, int retriesLeft) throws DoNotRetryIOException { // An exception was thrown which makes any partial results that we were collecting // invalid. The scanner will need to be reset to the beginning of a row. scanResultCache.clear(); // Unfortunately, DNRIOE is used in two different semantics. // (1) The first is to close the client scanner and bubble up the exception all the way // to the application. This is preferred when the exception is really un-recoverable // (like CorruptHFileException, etc). Plain DoNotRetryIOException also falls into this // bucket usually. // (2) Second semantics is to close the current region scanner only, but continue the // client scanner by overriding the exception. This is usually UnknownScannerException, // OutOfOrderScannerNextException, etc where the region scanner has to be closed, but the // application-level ClientScanner has to continue without bubbling up the exception to // the client. See RSRpcServices to see how it throws DNRIOE's. // See also: HBASE-16604, HBASE-17187 // If exception is any but the list below throw it back to the client; else setup // the scanner and retry. Throwable cause = e.getCause(); if ((cause != null && cause instanceof NotServingRegionException) || (cause != null && cause instanceof RegionServerStoppedException) || e instanceof OutOfOrderScannerNextException || e instanceof UnknownScannerException || e instanceof ScannerResetException) { // Pass. It is easier writing the if loop test as list of what is allowed rather than // as a list of what is not allowed... so if in here, it means we do not throw. if (retriesLeft <= 0) { throw e; // no more retries } } else { throw e; } // Else, its signal from depths of ScannerCallable that we need to reset the scanner. if (this.lastResult != null) { // The region has moved. We need to open a brand new scanner at the new location. // Reset the startRow to the row we've seen last so that the new scanner starts at // the correct row. Otherwise we may see previously returned rows again. // If the lastRow is not partial, then we should start from the next row. As now we can // exclude the start row, the logic here is the same for both normal scan and reversed scan. // If lastResult is partial then include it, otherwise exclude it. scan.withStartRow(lastResult.getRow(), lastResult.mayHaveMoreCellsInRow()); } if (e instanceof OutOfOrderScannerNextException) { if (retryAfterOutOfOrderException.isTrue()) { retryAfterOutOfOrderException.setValue(false); } else { // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE? throw new DoNotRetryIOException( "Failed after retry of OutOfOrderScannerNextException: was there a rpc timeout?", e); } } // Clear region. this.currentRegion = null; // Set this to zero so we don't try and do an rpc and close on remote server when // the exception we got was UnknownScanner or the Server is going down. callable = null; }
private void onError(Throwable error) { error = translateException(error); if (tries > startLogErrorsCnt) { LOG.warn("Call to " + loc.getServerName() + " for scanner id = " + scannerId + " for " + loc.getRegion().getEncodedName() + " of " + loc.getRegion().getTable() + " failed, , tries = " + tries + ", maxAttempts = " + maxAttempts + ", timeout = " + TimeUnit.NANOSECONDS.toMillis(scanTimeoutNs) + " ms, time elapsed = " + elapsedMs() + " ms", error); } boolean scannerClosed = error instanceof UnknownScannerException || error instanceof NotServingRegionException || error instanceof RegionServerStoppedException; RetriesExhaustedException.ThrowableWithExtraContext qt = new RetriesExhaustedException.ThrowableWithExtraContext(error, EnvironmentEdgeManager.currentTime(), ""); exceptions.add(qt); if (tries >= maxAttempts) { completeExceptionally(!scannerClosed); return; } long delayNs; if (scanTimeoutNs > 0) { long maxDelayNs = remainingTimeNs() - SLEEP_DELTA_NS; if (maxDelayNs <= 0) { completeExceptionally(!scannerClosed); return; } delayNs = Math.min(maxDelayNs, getPauseTime(pauseNs, tries - 1)); } else { delayNs = getPauseTime(pauseNs, tries - 1); } if (scannerClosed) { completeWhenError(false); return; } if (error instanceof OutOfOrderScannerNextException || error instanceof ScannerResetException) { completeWhenError(true); return; } if (error instanceof DoNotRetryIOException) { completeExceptionally(true); return; } tries++; retryTimer.newTimeout(t -> call(), delayNs, TimeUnit.NANOSECONDS); }
@Override public void onFailure(IOException e) { if (e instanceof DoNotRetryIOException) { // DNRIOEs are thrown to make us break out of retries. Some types of DNRIOEs want us // to reset the scanner and come back in again. if (e instanceof UnknownScannerException) { long timeout = lastNext + scannerTimeout; // If we are over the timeout, throw this exception to the client wrapped in // a ScannerTimeoutException. Else, it's because the region moved and we used the old // id against the new region server; reset the scanner. if (timeout < System.currentTimeMillis()) { long elapsed = System.currentTimeMillis() - lastNext; ScannerTimeoutException ex = new ScannerTimeoutException( elapsed + "ms passed since the last invocation, " + "timeout is currently set to " + scannerTimeout); ex.initCause(e); handler.onFailure(e); } } else { // If exception is any but the list below throw it back to the client; else setup // the scanner and retry. Throwable cause = e.getCause(); if ((cause != null && cause instanceof NotServingRegionException) || (cause != null && cause instanceof RegionServerStoppedException) || e instanceof OutOfOrderScannerNextException) { // Pass // It is easier writing the if loop test as list of what is allowed rather than // as a list of what is not allowed... so if in here, it means we do not throw. } else { handler.onFailure(e); } } // Else, its signal from depths of ScannerCallable that we need to reset the scanner. if (lastResult != null) { // The region has moved. We need to open a brand new scanner at // the new location. // Reset the startRow to the row we've seen last so that the new // scanner starts at the correct row. Otherwise we may see previously // returned rows again. // (ScannerCallable by now has "relocated" the correct region) scan.setStartRow(lastResult.getRow()); byte[] newStart = new byte[lastResult.getRow().length + 1]; System.arraycopy(lastResult.getRow(), 0, newStart, 0, newStart.length - 1); newStart[newStart.length - 1] = 0; scan.setStartRow(newStart); } if (e instanceof OutOfOrderScannerNextException) { if (retryAfterOutOfOrderException) { retryAfterOutOfOrderException = false; } else { handler.onFailure(new DoNotRetryIOException("Failed after retry of " + "OutOfOrderScannerNextException: was there a rpc timeout?", e)); } } // Clear region. currentRegion = null; // Set this to zero so we don't try and do an rpc and close on remote server when // the exception we got was UnknownScanner or the Server is going down. callable = null; // This continue will take us to while at end of loop where we will set up new scanner. tryAgain(null); } }