001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.Closeable; 021import java.io.IOException; 022import org.apache.hadoop.conf.Configurable; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.client.RegionInfoBuilder; 025import org.apache.hadoop.hbase.util.Threads; 026import org.apache.yetus.audience.InterfaceAudience; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029 030import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService; 031import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService; 032import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService; 033 034/** 035 * This class defines methods that can help with managing HBase clusters 036 * from unit tests and system tests. There are 3 types of cluster deployments: 037 * <ul> 038 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads, 039 * used by unit tests</li> 040 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can 041 * interact with the cluster. </li> 042 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate 043 * JVMs. </li> 044 * </ul> 045 * <p> 046 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can 047 * be run against a mini-cluster during unit test execution, or a distributed cluster having 048 * tens/hundreds of nodes during execution of integration tests. 049 * 050 * <p> 051 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume 052 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster, 053 * and some tests will still need to mock stuff and introspect internal state. For those use 054 * cases from unit tests, or if more control is needed, you can use the subclasses directly. 055 * In that sense, this class does not abstract away <strong>every</strong> interface that 056 * MiniHBaseCluster or DistributedHBaseCluster provide. 057 */ 058@InterfaceAudience.Private 059public abstract class HBaseCluster implements Closeable, Configurable { 060 // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope 061 static final Logger LOG = LoggerFactory.getLogger(HBaseCluster.class.getName()); 062 protected Configuration conf; 063 064 /** the status of the cluster before we begin */ 065 protected ClusterMetrics initialClusterStatus; 066 067 /** 068 * Construct an HBaseCluster 069 * @param conf Configuration to be used for cluster 070 */ 071 public HBaseCluster(Configuration conf) { 072 setConf(conf); 073 } 074 075 @Override 076 public void setConf(Configuration conf) { 077 this.conf = conf; 078 } 079 080 @Override 081 public Configuration getConf() { 082 return conf; 083 } 084 085 /** 086 * Returns a ClusterMetrics for this HBase cluster. 087 * @see #getInitialClusterMetrics() 088 */ 089 public abstract ClusterMetrics getClusterMetrics() throws IOException; 090 091 /** 092 * Returns a ClusterStatus for this HBase cluster as observed at the 093 * starting of the HBaseCluster 094 */ 095 public ClusterMetrics getInitialClusterMetrics() throws IOException { 096 return initialClusterStatus; 097 } 098 099 /** 100 * Returns an {@link MasterService.BlockingInterface} to the active master 101 */ 102 public abstract MasterService.BlockingInterface getMasterAdminService() 103 throws IOException; 104 105 /** 106 * Returns an AdminProtocol interface to the regionserver 107 */ 108 public abstract AdminService.BlockingInterface getAdminProtocol(ServerName serverName) 109 throws IOException; 110 111 /** 112 * Returns a ClientProtocol interface to the regionserver 113 */ 114 public abstract ClientService.BlockingInterface getClientProtocol(ServerName serverName) 115 throws IOException; 116 117 /** 118 * Starts a new region server on the given hostname or if this is a mini/local cluster, 119 * starts a region server locally. 120 * @param hostname the hostname to start the regionserver on 121 * @throws IOException if something goes wrong 122 */ 123 public abstract void startRegionServer(String hostname, int port) throws IOException; 124 125 /** 126 * Kills the region server process if this is a distributed cluster, otherwise 127 * this causes the region server to exit doing basic clean up only. 128 * @throws IOException if something goes wrong 129 */ 130 public abstract void killRegionServer(ServerName serverName) throws IOException; 131 132 /** 133 * Keeping track of killed servers and being able to check if a particular server was killed makes 134 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete 135 * example of such case is - killing servers and waiting for all regions of a particular table 136 * to be assigned. We can check for server column in META table and that its value is not one 137 * of the killed servers. 138 */ 139 public abstract boolean isKilledRS(ServerName serverName); 140 141 /** 142 * Stops the given region server, by attempting a gradual stop. 143 * @return whether the operation finished with success 144 * @throws IOException if something goes wrong 145 */ 146 public abstract void stopRegionServer(ServerName serverName) throws IOException; 147 148 /** 149 * Wait for the specified region server to join the cluster 150 * @return whether the operation finished with success 151 * @throws IOException if something goes wrong or timeout occurs 152 */ 153 public void waitForRegionServerToStart(String hostname, int port, long timeout) 154 throws IOException { 155 long start = System.currentTimeMillis(); 156 while ((System.currentTimeMillis() - start) < timeout) { 157 for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) { 158 if (server.getHostname().equals(hostname) && server.getPort() == port) { 159 return; 160 } 161 } 162 Threads.sleep(100); 163 } 164 throw new IOException("did timeout " + timeout + "ms waiting for region server to start: " 165 + hostname); 166 } 167 168 /** 169 * Wait for the specified region server to stop the thread / process. 170 * @return whether the operation finished with success 171 * @throws IOException if something goes wrong or timeout occurs 172 */ 173 public abstract void waitForRegionServerToStop(ServerName serverName, long timeout) 174 throws IOException; 175 176 /** 177 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, 178 * silently logs warning message. 179 * @param hostname the hostname to start the regionserver on 180 * @throws IOException if something goes wrong 181 */ 182 public abstract void startZkNode(String hostname, int port) throws IOException; 183 184 /** 185 * Kills the zookeeper node process if this is a distributed cluster, otherwise, 186 * this causes master to exit doing basic clean up only. 187 * @throws IOException if something goes wrong 188 */ 189 public abstract void killZkNode(ServerName serverName) throws IOException; 190 191 /** 192 * Stops the region zookeeper if this is a distributed cluster, otherwise 193 * silently logs warning message. 194 * @throws IOException if something goes wrong 195 */ 196 public abstract void stopZkNode(ServerName serverName) throws IOException; 197 198 /** 199 * Wait for the specified zookeeper node to join the cluster 200 * @return whether the operation finished with success 201 * @throws IOException if something goes wrong or timeout occurs 202 */ 203 public abstract void waitForZkNodeToStart(ServerName serverName, long timeout) 204 throws IOException; 205 206 /** 207 * Wait for the specified zookeeper node to stop the thread / process. 208 * @return whether the operation finished with success 209 * @throws IOException if something goes wrong or timeout occurs 210 */ 211 public abstract void waitForZkNodeToStop(ServerName serverName, long timeout) 212 throws IOException; 213 214 /** 215 * Starts a new datanode on the given hostname or if this is a mini/local cluster, 216 * silently logs warning message. 217 * @throws IOException if something goes wrong 218 */ 219 public abstract void startDataNode(ServerName serverName) throws IOException; 220 221 /** 222 * Kills the datanode process if this is a distributed cluster, otherwise, 223 * this causes master to exit doing basic clean up only. 224 * @throws IOException if something goes wrong 225 */ 226 public abstract void killDataNode(ServerName serverName) throws IOException; 227 228 /** 229 * Stops the datanode if this is a distributed cluster, otherwise 230 * silently logs warning message. 231 * @throws IOException if something goes wrong 232 */ 233 public abstract void stopDataNode(ServerName serverName) throws IOException; 234 235 /** 236 * Wait for the specified datanode to join the cluster 237 * @return whether the operation finished with success 238 * @throws IOException if something goes wrong or timeout occurs 239 */ 240 public abstract void waitForDataNodeToStart(ServerName serverName, long timeout) 241 throws IOException; 242 243 /** 244 * Wait for the specified datanode to stop the thread / process. 245 * @return whether the operation finished with success 246 * @throws IOException if something goes wrong or timeout occurs 247 */ 248 public abstract void waitForDataNodeToStop(ServerName serverName, long timeout) 249 throws IOException; 250 251 /** 252 * Starts a new master on the given hostname or if this is a mini/local cluster, 253 * starts a master locally. 254 * @param hostname the hostname to start the master on 255 * @return whether the operation finished with success 256 * @throws IOException if something goes wrong 257 */ 258 public abstract void startMaster(String hostname, int port) throws IOException; 259 260 /** 261 * Kills the master process if this is a distributed cluster, otherwise, 262 * this causes master to exit doing basic clean up only. 263 * @throws IOException if something goes wrong 264 */ 265 public abstract void killMaster(ServerName serverName) throws IOException; 266 267 /** 268 * Stops the given master, by attempting a gradual stop. 269 * @throws IOException if something goes wrong 270 */ 271 public abstract void stopMaster(ServerName serverName) throws IOException; 272 273 /** 274 * Wait for the specified master to stop the thread / process. 275 * @throws IOException if something goes wrong or timeout occurs 276 */ 277 public abstract void waitForMasterToStop(ServerName serverName, long timeout) 278 throws IOException; 279 280 /** 281 * Blocks until there is an active master and that master has completed 282 * initialization. 283 * 284 * @return true if an active master becomes available. false if there are no 285 * masters left. 286 * @throws IOException if something goes wrong or timeout occurs 287 */ 288 public boolean waitForActiveAndReadyMaster() 289 throws IOException { 290 return waitForActiveAndReadyMaster(Long.MAX_VALUE); 291 } 292 293 /** 294 * Blocks until there is an active master and that master has completed 295 * initialization. 296 * @param timeout the timeout limit in ms 297 * @return true if an active master becomes available. false if there are no 298 * masters left. 299 */ 300 public abstract boolean waitForActiveAndReadyMaster(long timeout) 301 throws IOException; 302 303 /** 304 * Wait for HBase Cluster to shut down. 305 */ 306 public abstract void waitUntilShutDown() throws IOException; 307 308 /** 309 * Shut down the HBase cluster 310 */ 311 public abstract void shutdown() throws IOException; 312 313 /** 314 * Restores the cluster to it's initial state if this is a real cluster, 315 * otherwise does nothing. 316 * This is a best effort restore. If the servers are not reachable, or insufficient 317 * permissions, etc. restoration might be partial. 318 * @return whether restoration is complete 319 */ 320 public boolean restoreInitialStatus() throws IOException { 321 return restoreClusterMetrics(getInitialClusterMetrics()); 322 } 323 324 /** 325 * Restores the cluster to given state if this is a real cluster, 326 * otherwise does nothing. 327 * This is a best effort restore. If the servers are not reachable, or insufficient 328 * permissions, etc. restoration might be partial. 329 * @return whether restoration is complete 330 */ 331 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException { 332 return true; 333 } 334 335 /** 336 * Get the ServerName of region server serving the first hbase:meta region 337 */ 338 public ServerName getServerHoldingMeta() throws IOException { 339 return getServerHoldingRegion(TableName.META_TABLE_NAME, 340 RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName()); 341 } 342 343 /** 344 * Get the ServerName of region server serving the specified region 345 * @param regionName Name of the region in bytes 346 * @param tn Table name that has the region. 347 * @return ServerName that hosts the region or null 348 */ 349 public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName) 350 throws IOException; 351 352 /** 353 * @return whether we are interacting with a distributed cluster as opposed to an 354 * in-process mini/local cluster. 355 */ 356 public boolean isDistributedCluster() { 357 return false; 358 } 359 360 /** 361 * Closes all the resources held open for this cluster. Note that this call does not shutdown 362 * the cluster. 363 * @see #shutdown() 364 */ 365 @Override 366 public abstract void close() throws IOException; 367 368 /** 369 * Wait for the namenode. 370 * 371 * @throws InterruptedException 372 */ 373 public void waitForNamenodeAvailable() throws InterruptedException { 374 } 375 376 public void waitForDatanodesRegistered(int nbDN) throws Exception { 377 } 378}