@Test public void testResourceUpdateOnRunningNode() { RMNodeImpl node = getRunningNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); assertEquals("GPU resource is not match.", newCapacity.getGpuCores(), 2); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
@Test public void testResourceUpdateOnNewNode() { RMNodeImpl node = getNewNode(Resource.newInstance(4096, 4, 4)); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); assertEquals("GPU resource is not match.", newCapacity.getGpuCores(), 2); Assert.assertEquals(NodeState.NEW, node.getState()); }
@Test public void testResourceUpdateOnRebootedNode() { RMNodeImpl node = getRebootedNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); assertEquals("GPU resource is not match.", newCapacity.getGpuCores(), 2); Assert.assertEquals(NodeState.REBOOTED, node.getState()); }
public Map<NodeId, ResourceOption> getNodeResourceMap() { String[] nodes = getNodes(); Map<NodeId, ResourceOption> resourceOptions = new HashMap<NodeId, ResourceOption> (); for (String node : nodes) { NodeId nid = ConverterUtils.toNodeId(node); int vcores = getVcoresPerNode(node); int memory = getMemoryPerNode(node); int overCommitTimeout = getOverCommitTimeoutPerNode(node); Resource resource = Resources.createResource(memory, vcores); ResourceOption resourceOption = ResourceOption.newInstance(resource, overCommitTimeout); resourceOptions.put(nid, resourceOption); } return resourceOptions; }
@Test public void testResourceUpdateOnRunningNode() { RMNodeImpl node = getRunningNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2), ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
@Test public void testResourceUpdateOnRebootedNode() { RMNodeImpl node = getRebootedNode(); ClusterMetrics cm = ClusterMetrics.getMetrics(); int initialActive = cm.getNumActiveNMs(); int initialUnHealthy = cm.getUnhealthyNMs(); int initialDecommissioning = cm.getNumDecommissioningNMs(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption .newInstance(Resource.newInstance(2048, 2), ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.REBOOTED, node.getState()); Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs()); Assert.assertEquals("Unhelathy Nodes", initialUnHealthy, cm.getUnhealthyNMs()); Assert.assertEquals("Decommissioning Nodes", initialDecommissioning, cm.getNumDecommissioningNMs()); }
private int updateNodeResource(String nodeIdStr, int memSize, int cores, int overCommitTimeout) throws IOException, YarnException { // check resource value first if (invalidResourceValue(memSize, cores)) { throw new IllegalArgumentException("Invalid resource value: " + "(" + memSize + "," + cores + ") for updateNodeResource."); } // Refresh the nodes ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol(); UpdateNodeResourceRequest request = recordFactory.newRecordInstance(UpdateNodeResourceRequest.class); NodeId nodeId = ConverterUtils.toNodeId(nodeIdStr); Resource resource = Resources.createResource(memSize, cores); Map<NodeId, ResourceOption> resourceMap = new HashMap<NodeId, ResourceOption>(); resourceMap.put( nodeId, ResourceOption.newInstance(resource, overCommitTimeout)); request.setNodeResourceMap(resourceMap); adminProtocol.updateNodeResource(request); return 0; }
@Test(timeout=500) public void testUpdateNodeResource() throws Exception { String nodeIdStr = "0.0.0.0:0"; int memSize = 2048; int cores = 2; String[] args = { "-updateNodeResource", nodeIdStr, Integer.toString(memSize), Integer.toString(cores) }; assertEquals(0, rmAdminCLI.run(args)); ArgumentCaptor<UpdateNodeResourceRequest> argument = ArgumentCaptor.forClass(UpdateNodeResourceRequest.class); verify(admin).updateNodeResource(argument.capture()); UpdateNodeResourceRequest request = argument.getValue(); Map<NodeId, ResourceOption> resourceMap = request.getNodeResourceMap(); NodeId nodeId = ConverterUtils.toNodeId(nodeIdStr); Resource expectedResource = Resources.createResource(memSize, cores); ResourceOption resource = resourceMap.get(nodeId); assertNotNull("resource for " + nodeIdStr + " shouldn't be null.", resource); assertEquals("resource value for " + nodeIdStr + " is not as expected.", ResourceOption.newInstance(expectedResource, ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT), resource); }
@Test public void testResourceUpdateOnRunningNode() { RMNodeImpl node = getRunningNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
public Map<NodeId, ResourceOption> getNodeResourceMap() { String[] nodes = getNodes(); Map<NodeId, ResourceOption> resourceOptions = new HashMap<NodeId, ResourceOption> (); for (String node : nodes) { NodeId nid = NodeId.fromString(node); int vcores = getVcoresPerNode(node); int memory = getMemoryPerNode(node); int overCommitTimeout = getOverCommitTimeoutPerNode(node); Resource resource = Resources.createResource(memory, vcores); ResourceOption resourceOption = ResourceOption.newInstance(resource, overCommitTimeout); resourceOptions.put(nid, resourceOption); } return resourceOptions; }
@Override public void transition(RMNodeImpl rmNode, RMNodeEvent event) { // Restore the original total capability if (rmNode.originalTotalCapability != null) { rmNode.totalCapability = rmNode.originalTotalCapability; rmNode.originalTotalCapability = null; } LOG.info("Node " + rmNode.nodeId + " in DECOMMISSIONING is " + "recommissioned back to RUNNING."); rmNode .updateMetricsForGracefulDecommission(rmNode.getState(), finalState); //update the scheduler with the restored original total capability rmNode.context .getDispatcher() .getEventHandler() .handle( new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption .newInstance(rmNode.totalCapability, 0))); }
@Test public void testResourceUpdateOnRunningNode() { RMNodeImpl node = getRunningNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemorySize(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2), ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemorySize(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.RUNNING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
@Test public void testResourceUpdateOnRebootedNode() { RMNodeImpl node = getRebootedNode(); ClusterMetrics cm = ClusterMetrics.getMetrics(); int initialActive = cm.getNumActiveNMs(); int initialUnHealthy = cm.getUnhealthyNMs(); int initialDecommissioning = cm.getNumDecommissioningNMs(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemorySize(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption .newInstance(Resource.newInstance(2048, 2), ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemorySize(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.REBOOTED, node.getState()); Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs()); Assert.assertEquals("Unhelathy Nodes", initialUnHealthy, cm.getUnhealthyNMs()); Assert.assertEquals("Decommissioning Nodes", initialDecommissioning, cm.getNumDecommissioningNMs()); }
@Test public void testResourceUpdateOnDecommissioningNode() { RMNodeImpl node = getDecommissioningNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemorySize(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2), ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource originalCapacity = node.getOriginalTotalCapability(); assertEquals("Memory resource is not match.", originalCapacity.getMemorySize(), oldCapacity.getMemorySize()); assertEquals("CPU resource is not match.", originalCapacity.getVirtualCores(), oldCapacity.getVirtualCores()); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemorySize(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState()); Assert.assertNotNull(nodesListManagerEvent); Assert.assertEquals(NodesListManagerEventType.NODE_USABLE, nodesListManagerEvent.getType()); }
private int updateNodeResource(String nodeIdStr, int memSize, int cores, int overCommitTimeout) throws IOException, YarnException { // check resource value first if (invalidResourceValue(memSize, cores)) { throw new IllegalArgumentException("Invalid resource value: " + "(" + memSize + "," + cores + ") for updateNodeResource."); } // Refresh the nodes ResourceManagerAdministrationProtocol adminProtocol = createAdminProtocol(); UpdateNodeResourceRequest request = recordFactory.newRecordInstance(UpdateNodeResourceRequest.class); NodeId nodeId = NodeId.fromString(nodeIdStr); Resource resource = Resources.createResource(memSize, cores); Map<NodeId, ResourceOption> resourceMap = new HashMap<NodeId, ResourceOption>(); resourceMap.put( nodeId, ResourceOption.newInstance(resource, overCommitTimeout)); request.setNodeResourceMap(resourceMap); adminProtocol.updateNodeResource(request); return 0; }
private int handleUpdateNodeResource(String[] args, String cmd, boolean isHAEnabled) throws NumberFormatException, IOException, YarnException { int i = 1; if (args.length < 4 || args.length > 5) { System.err.println("Number of parameters specified for " + "updateNodeResource is wrong."); printUsage(cmd, isHAEnabled); return -1; } else { String nodeID = args[i++]; String memSize = args[i++]; String cores = args[i++]; int overCommitTimeout = ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT; if (i == args.length - 1) { overCommitTimeout = Integer.parseInt(args[i]); } return updateNodeResource(nodeID, Integer.parseInt(memSize), Integer.parseInt(cores), overCommitTimeout); } }
@Test public void testUpdateNodeResource() throws Exception { String nodeIdStr = "0.0.0.0:0"; int memSize = 2048; int cores = 2; String[] args = { "-updateNodeResource", nodeIdStr, Integer.toString(memSize), Integer.toString(cores) }; assertEquals(0, rmAdminCLI.run(args)); ArgumentCaptor<UpdateNodeResourceRequest> argument = ArgumentCaptor.forClass(UpdateNodeResourceRequest.class); verify(admin).updateNodeResource(argument.capture()); UpdateNodeResourceRequest request = argument.getValue(); Map<NodeId, ResourceOption> resourceMap = request.getNodeResourceMap(); NodeId nodeId = NodeId.fromString(nodeIdStr); Resource expectedResource = Resources.createResource(memSize, cores); ResourceOption resource = resourceMap.get(nodeId); assertNotNull("resource for " + nodeIdStr + " shouldn't be null.", resource); assertEquals("resource value for " + nodeIdStr + " is not as expected.", ResourceOption.newInstance(expectedResource, ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT), resource); }
public RMNodeImpl(NodeId nodeId, RMContext context, String hostName, int cmPort, int httpPort, Node node, ResourceOption resourceOption, String nodeManagerVersion) { this.nodeId = nodeId; this.context = context; this.hostName = hostName; this.commandPort = cmPort; this.httpPort = httpPort; this.resourceOption = resourceOption; this.nodeAddress = hostName + ":" + cmPort; this.httpAddress = hostName + ":" + httpPort; this.node = node; this.healthReport = "Healthy"; this.lastHealthReportTime = System.currentTimeMillis(); this.nodeManagerVersion = nodeManagerVersion; this.latestNodeHeartBeatResponse.setResponseId(0); ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); this.readLock = lock.readLock(); this.writeLock = lock.writeLock(); this.stateMachine = stateMachineFactory.make(this); this.nodeUpdateQueue = new ConcurrentLinkedQueue<UpdatedContainerInfo>(); }
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr, int hostnum, String hostName, int port) { final String rackName = "rack"+ rack; final int nid = hostnum; final String nodeAddr = hostName + ":" + nid; if (hostName == null) { hostName = "host"+ nid; } final NodeId nodeID = NodeId.newInstance(hostName, port); final String httpAddress = httpAddr; String healthReport = (state == NodeState.UNHEALTHY) ? null : "HealthyMe"; return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, ResourceOption.newInstance(perNode, RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT), rackName, healthReport, 0, nid, hostName, state); }
@Override public void setNodeResourceMap(Map<NodeId, ResourceOption> nodeResourceMap) { if (nodeResourceMap == null) { return; } initNodeResourceMap(); this.nodeResourceMap.clear(); this.nodeResourceMap.putAll(nodeResourceMap); }
private void initNodeResourceMap() { if (this.nodeResourceMap != null) { return; } UpdateNodeResourceRequestProtoOrBuilder p = viaProto ? proto : builder; List<NodeResourceMapProto> list = p.getNodeResourceMapList(); this.nodeResourceMap = new HashMap<NodeId, ResourceOption>(list .size()); for (NodeResourceMapProto nodeResourceProto : list) { this.nodeResourceMap.put(convertFromProtoFormat(nodeResourceProto.getNodeId()), convertFromProtoFormat(nodeResourceProto.getResourceOption())); } }
/** * Process resource update on a node and update Queue. */ @Override public synchronized void updateNodeResource(RMNode nm, ResourceOption resourceOption) { super.updateNodeResource(nm, resourceOption); updateRootQueueMetrics(); queueMgr.getRootQueue().setSteadyFairShare(clusterResource); queueMgr.getRootQueue().recomputeSteadyShares(); }
/** * Process resource update on a node. */ public synchronized void updateNodeResource(RMNode nm, ResourceOption resourceOption) { SchedulerNode node = getSchedulerNode(nm.getNodeID()); Resource newResource = resourceOption.getResource(); Resource oldResource = node.getTotalResource(); if(!oldResource.equals(newResource)) { // Notify NodeLabelsManager about this change rmContext.getNodeLabelManager().updateNodeResource(nm.getNodeID(), newResource); // Log resource change LOG.info("Update resource on node: " + node.getNodeName() + " from: " + oldResource + ", to: " + newResource); nodes.remove(nm.getNodeID()); updateMaximumAllocation(node, false); // update resource to node node.setTotalResource(newResource); nodes.put(nm.getNodeID(), (N)node); updateMaximumAllocation(node, true); // update resource to clusterResource Resources.subtractFrom(clusterResource, oldResource); Resources.addTo(clusterResource, newResource); } else { // Log resource change LOG.warn("Update resource on node: " + node.getNodeName() + " with the same resource: " + newResource); } }
/** * Process resource update on a node. */ private synchronized void updateNodeAndQueueResource(RMNode nm, ResourceOption resourceOption) { updateNodeResource(nm, resourceOption); root.updateClusterResource(clusterResource, new ResourceLimits( clusterResource)); }
@Public @Evolving public static UpdateNodeResourceRequest newInstance( Map<NodeId, ResourceOption> nodeResourceMap) { UpdateNodeResourceRequest request = Records.newRecord(UpdateNodeResourceRequest.class); request.setNodeResourceMap(nodeResourceMap); return request; }
@Test public void testResourceUpdateOnNewNode() { RMNodeImpl node = getNewNode(Resource.newInstance(4096, 4)); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2), ResourceOption.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.NEW, node.getState()); }
private static void updateNodeResourceFromEvent(RMNodeImpl rmNode, RMNodeResourceUpdateEvent event){ ResourceOption resourceOption = event.getResourceOption(); // Set resource on RMNode rmNode.totalCapability = resourceOption.getResource(); }
/** * Process resource update on a node. */ public synchronized void updateNodeResource(RMNode nm, ResourceOption resourceOption) { SchedulerNode node = getSchedulerNode(nm.getNodeID()); Resource newResource = resourceOption.getResource(); Resource oldResource = node.getTotalResource(); if(!oldResource.equals(newResource)) { // Log resource change LOG.info("Update resource on node: " + node.getNodeName() + " from: " + oldResource + ", to: " + newResource); nodes.remove(nm.getNodeID()); updateMaximumAllocation(node, false); // update resource to node node.setTotalResource(newResource); nodes.put(nm.getNodeID(), (N)node); updateMaximumAllocation(node, true); // update resource to clusterResource Resources.subtractFrom(clusterResource, oldResource); Resources.addTo(clusterResource, newResource); } else { // Log resource change LOG.warn("Update resource on node: " + node.getNodeName() + " with the same resource: " + newResource); } }
@Test public void testResourceUpdateOnNewNode() { RMNodeImpl node = getNewNode(Resource.newInstance(4096, 4)); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemory(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeResourceUpdateEvent(node.getNodeID(), ResourceOption.newInstance(Resource.newInstance(2048, 2), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT))); Resource newCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", newCapacity.getMemory(), 2048); assertEquals("CPU resource is not match.", newCapacity.getVirtualCores(), 2); Assert.assertEquals(NodeState.NEW, node.getState()); }