void getDataSetWatchFailure(String path) { synchronized (grabTaskLock) { if (workerInGrabTask) { // currentTask can change but that's ok String taskpath = currentTask; if (taskpath != null && taskpath.equals(path)) { LOG.info("retrying data watch on " + path); SplitLogCounters.tot_wkr_get_data_retry.incrementAndGet(); getDataSetWatchAsync(); } else { // no point setting a watch on the task which this worker is not // working upon anymore } } } }
private void deleteNodeSuccess(String path) { if (ignoreZKDeleteForTesting) { return; } Task task; task = details.getTasks().remove(path); if (task == null) { if (ZKSplitLog.isRescanNode(watcher, path)) { SplitLogCounters.tot_mgr_rescan_deleted.incrementAndGet(); } SplitLogCounters.tot_mgr_missing_state_in_delete.incrementAndGet(); LOG.debug("deleted task without in memory state " + path); return; } synchronized (task) { task.status = DELETED; task.notify(); } SplitLogCounters.tot_mgr_task_deleted.incrementAndGet(); }
private void heartbeat(String path, int new_version, ServerName workerName) { Task task = findOrCreateOrphanTask(path); if (new_version != task.last_version) { if (task.isUnassigned()) { LOG.info("task " + path + " acquired by " + workerName); } task.heartbeat(EnvironmentEdgeManager.currentTime(), new_version, workerName); SplitLogCounters.tot_mgr_heartbeat.incrementAndGet(); } else { // duplicate heartbeats - heartbeats w/o zk node version // changing - are possible. The timeout thread does // getDataSetWatch() just to check whether a node still // exists or not } return; }
@Ignore("DLR is broken by HBASE-12751") @Test(timeout=60000) public void testGetPreviousRecoveryMode() throws Exception { LOG.info("testGetPreviousRecoveryMode"); SplitLogCounters.resetCounters(); // Not actually enabling DLR for the cluster, just for the ZkCoordinatedStateManager to use. // The test is just manipulating ZK manually anyways. conf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true); zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "testRecovery"), new SplitLogTask.Unassigned( ServerName.valueOf("mgr,1,1"), RecoveryMode.LOG_SPLITTING).toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); slm = new SplitLogManager(ds, conf, stopper, master, DUMMY_MASTER); LOG.info("Mode1=" + slm.getRecoveryMode()); assertTrue(slm.isLogSplitting()); zkw.getRecoverableZooKeeper().delete(ZKSplitLog.getEncodedNodeName(zkw, "testRecovery"), -1); LOG.info("Mode2=" + slm.getRecoveryMode()); slm.setRecoveryMode(false); LOG.info("Mode3=" + slm.getRecoveryMode()); assertTrue("Mode4=" + slm.getRecoveryMode(), slm.isLogReplaying()); }
@Before public void setup() throws Exception { TEST_UTIL.startMiniZKCluster(); Configuration conf = TEST_UTIL.getConfiguration(); zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), "split-log-worker-tests", null); ds = new DummyServer(zkw, conf); ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode); ZKUtil.createAndFailSilent(zkw, zkw.baseZNode); assertThat(ZKUtil.checkExists(zkw, zkw.baseZNode), not (is(-1))); LOG.debug(zkw.baseZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode); assertThat(ZKUtil.checkExists(zkw, zkw.splitLogZNode), not (is(-1))); LOG.debug(zkw.splitLogZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.rsZNode); assertThat(ZKUtil.checkExists(zkw, zkw.rsZNode), not (is(-1))); SplitLogCounters.resetCounters(); executorService = new ExecutorService("TestSplitLogWorker"); executorService.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, 10); this.mode = (conf.getBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, false) ? RecoveryMode.LOG_REPLAY : RecoveryMode.LOG_SPLITTING); }
@Test(timeout=60000) public void testAcquireTaskAtStartup() throws Exception { LOG.info("testAcquireTaskAtStartup"); SplitLogCounters.resetCounters(); final String TATAS = "tatas"; final ServerName RS = ServerName.valueOf("rs,1,1"); RegionServerServices mockedRS = getRegionServer(RS); zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS), new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1"), this.mode).toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); SplitLogWorker slw = new SplitLogWorker(ds, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask); slw.start(); try { waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME); byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS)); SplitLogTask slt = SplitLogTask.parseFrom(bytes); assertTrue(slt.isOwned(RS)); } finally { stopSplitLogWorker(slw); } }
@Test(timeout=60000) public void testGetPreviousRecoveryMode() throws Exception { LOG.info("testGetPreviousRecoveryMode"); SplitLogCounters.resetCounters(); Configuration testConf = HBaseConfiguration.create(TEST_UTIL.getConfiguration()); testConf.setBoolean(HConstants.DISTRIBUTED_LOG_REPLAY_KEY, true); ds = new DummyServer(zkw, testConf); zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "testRecovery"), new SplitLogTask.Unassigned( ServerName.valueOf("mgr,1,1"), RecoveryMode.LOG_SPLITTING).toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); slm = new SplitLogManager(ds, testConf, stopper, master, DUMMY_MASTER); LOG.info("Mode1=" + slm.getRecoveryMode()); assertTrue(slm.isLogSplitting()); zkw.getRecoverableZooKeeper().delete(ZKSplitLog.getEncodedNodeName(zkw, "testRecovery"), -1); LOG.info("Mode2=" + slm.getRecoveryMode()); slm.setRecoveryMode(false); LOG.info("Mode3=" + slm.getRecoveryMode()); assertTrue("Mode4=" + slm.getRecoveryMode(), slm.isLogReplaying()); }
private void startCluster(int num_rs) throws Exception { SplitLogCounters.resetCounters(); LOG.info("Starting cluster"); conf.getLong("hbase.splitlog.max.resubmit", 0); // Make the failure test faster conf.setInt("zookeeper.recovery.retry", 0); conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1); conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing conf.setInt("hbase.regionserver.wal.max.splitters", 3); TEST_UTIL.shutdownMiniHBaseCluster(); TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.setDFSCluster(dfsCluster); TEST_UTIL.setZkCluster(zkCluster); TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs); cluster = TEST_UTIL.getHBaseCluster(); LOG.info("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); master = cluster.getMaster(); while (cluster.getLiveRegionServerThreads().size() < num_rs) { Threads.sleep(1); } }
private void heartbeat(String path, int new_version, ServerName workerName) { Task task = findOrCreateOrphanTask(path); if (new_version != task.last_version) { if (task.isUnassigned()) { LOG.info("task " + path + " acquired by " + workerName); } task.heartbeat(EnvironmentEdgeManager.currentTimeMillis(), new_version, workerName); SplitLogCounters.tot_mgr_heartbeat.incrementAndGet(); } else { // duplicate heartbeats - heartbeats w/o zk node version // changing - are possible. The timeout thread does // getDataSetWatch() just to check whether a node still // exists or not } return; }
private void deleteNodeSuccess(String path) { if (ignoreZKDeleteForTesting) { return; } Task task; task = tasks.remove(path); if (task == null) { if (ZKSplitLog.isRescanNode(watcher, path)) { SplitLogCounters.tot_mgr_rescan_deleted.incrementAndGet(); } SplitLogCounters.tot_mgr_missing_state_in_delete.incrementAndGet(); LOG.debug("deleted task without in memory state " + path); return; } synchronized (task) { task.status = DELETED; task.notify(); } SplitLogCounters.tot_mgr_task_deleted.incrementAndGet(); }
private void startCluster(int num_rs) throws Exception { SplitLogCounters.resetCounters(); LOG.info("Starting cluster"); conf.getLong("hbase.splitlog.max.resubmit", 0); // Make the failure test faster conf.setInt("zookeeper.recovery.retry", 0); conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1); conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing conf.setInt("hbase.regionserver.wal.max.splitters", 3); TEST_UTIL = new HBaseTestingUtility(conf); TEST_UTIL.setDFSCluster(dfsCluster); TEST_UTIL.setZkCluster(zkCluster); TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, num_rs); cluster = TEST_UTIL.getHBaseCluster(); LOG.info("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); master = cluster.getMaster(); while (cluster.getLiveRegionServerThreads().size() < num_rs) { Threads.sleep(1); } }
@Before public void setup() throws Exception { TEST_UTIL.startMiniZKCluster(); zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), "split-log-worker-tests", null); ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode); ZKUtil.createAndFailSilent(zkw, zkw.baseZNode); assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1); LOG.debug(zkw.baseZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode); assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1); LOG.debug(zkw.splitLogZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.rsZNode); assertTrue(ZKUtil.checkExists(zkw, zkw.rsZNode) != -1); SplitLogCounters.resetCounters(); executorService = new ExecutorService("TestSplitLogWorker"); executorService.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, 10); }
@Test(timeout=60000) public void testAcquireTaskAtStartup() throws Exception { LOG.info("testAcquireTaskAtStartup"); SplitLogCounters.resetCounters(); final String TATAS = "tatas"; final ServerName RS = ServerName.valueOf("rs,1,1"); RegionServerServices mockedRS = getRegionServer(RS); zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS), new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask); slw.start(); try { waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME); byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS)); SplitLogTask slt = SplitLogTask.parseFrom(bytes); assertTrue(slt.isOwned(RS)); } finally { stopSplitLogWorker(slw); } }
void getDataSetWatchFailure(String path) { synchronized (grabTaskLock) { if (workerInGrabTask) { // currentTask can change but that's ok String taskpath = currentTask; if (taskpath != null && taskpath.equals(path)) { LOG.info("retrying data watch on " + path); SplitLogCounters.tot_wkr_get_data_retry.increment(); getDataSetWatchAsync(); } else { // no point setting a watch on the task which this worker is not // working upon anymore } } } }
private void deleteNodeSuccess(String path) { if (ignoreZKDeleteForTesting) { return; } Task task; task = details.getTasks().remove(path); if (task == null) { if (ZKSplitLog.isRescanNode(watcher, path)) { SplitLogCounters.tot_mgr_rescan_deleted.increment(); } SplitLogCounters.tot_mgr_missing_state_in_delete.increment(); LOG.debug("Deleted task without in memory state " + path); return; } synchronized (task) { task.status = DELETED; task.notify(); } SplitLogCounters.tot_mgr_task_deleted.increment(); }
private void heartbeat(String path, int new_version, ServerName workerName) { Task task = findOrCreateOrphanTask(path); if (new_version != task.last_version) { if (task.isUnassigned()) { LOG.info("Task " + path + " acquired by " + workerName); } task.heartbeat(EnvironmentEdgeManager.currentTime(), new_version, workerName); SplitLogCounters.tot_mgr_heartbeat.increment(); } else { // duplicate heartbeats - heartbeats w/o zk node version // changing - are possible. The timeout thread does // getDataSetWatch() just to check whether a node still // exists or not } return; }
private void startCluster(int numRS) throws Exception { SplitLogCounters.resetCounters(); LOG.info("Starting cluster"); conf.setLong("hbase.splitlog.max.resubmit", 0); // Make the failure test faster conf.setInt("zookeeper.recovery.retry", 0); conf.setInt(HConstants.REGIONSERVER_INFO_PORT, -1); conf.setFloat(HConstants.LOAD_BALANCER_SLOP_KEY, (float) 100.0); // no load balancing conf.setInt("hbase.regionserver.wal.max.splitters", 3); conf.setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10); conf.set("hbase.wal.provider", getWalProvider()); TEST_UTIL.startMiniHBaseCluster(NUM_MASTERS, numRS); cluster = TEST_UTIL.getHBaseCluster(); LOG.info("Waiting for active/ready master"); cluster.waitForActiveAndReadyMaster(); master = cluster.getMaster(); TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { @Override public boolean evaluate() throws Exception { return cluster.getLiveRegionServerThreads().size() >= numRS; } }); }
@Before public void setup() throws Exception { TEST_UTIL.startMiniZKCluster(); Configuration conf = TEST_UTIL.getConfiguration(); zkw = new ZKWatcher(TEST_UTIL.getConfiguration(), "split-log-worker-tests", null); ds = new DummyServer(zkw, conf); ZKUtil.deleteChildrenRecursively(zkw, zkw.znodePaths.baseZNode); ZKUtil.createAndFailSilent(zkw, zkw.znodePaths.baseZNode); assertThat(ZKUtil.checkExists(zkw, zkw.znodePaths.baseZNode), not (is(-1))); LOG.debug(zkw.znodePaths.baseZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.znodePaths.splitLogZNode); assertThat(ZKUtil.checkExists(zkw, zkw.znodePaths.splitLogZNode), not (is(-1))); LOG.debug(zkw.znodePaths.splitLogZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.znodePaths.rsZNode); assertThat(ZKUtil.checkExists(zkw, zkw.znodePaths.rsZNode), not (is(-1))); SplitLogCounters.resetCounters(); executorService = new ExecutorService("TestSplitLogWorker"); executorService.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, 10); }
@Test(timeout=60000) public void testAcquireTaskAtStartup() throws Exception { LOG.info("testAcquireTaskAtStartup"); SplitLogCounters.resetCounters(); final String TATAS = "tatas"; final ServerName RS = ServerName.valueOf("rs,1,1"); RegionServerServices mockedRS = getRegionServer(RS); zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS), new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); SplitLogWorker slw = new SplitLogWorker(ds, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask); slw.start(); try { waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, WAIT_TIME); byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS)); SplitLogTask slt = SplitLogTask.parseFrom(bytes); assertTrue(slt.isOwned(RS)); } finally { stopSplitLogWorker(slw); } }
@Test public void testAcquireTaskAtStartup() throws Exception { LOG.info("testAcquireTaskAtStartup"); SplitLogCounters.resetCounters(); final String TATAS = "tatas"; final ServerName RS = ServerName.valueOf("rs,1,1"); RegionServerServices mockedRS = getRegionServer(RS); zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, TATAS), new SplitLogTask.Unassigned(ServerName.valueOf("mgr,1,1")).toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(), mockedRS, neverEndingTask); slw.start(); try { waitForCounter(SplitLogCounters.tot_wkr_task_acquired, 0, 1, 1500); byte [] bytes = ZKUtil.getData(zkw, ZKSplitLog.getEncodedNodeName(zkw, TATAS)); SplitLogTask slt = SplitLogTask.parseFrom(bytes); assertTrue(slt.isOwned(RS)); } finally { stopSplitLogWorker(slw); } }