001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertFalse; 021import static org.junit.Assert.assertNotNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.List; 026import java.util.Optional; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.HBaseClassTestRule; 031import org.apache.hadoop.hbase.HBaseTestingUtility; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.MiniHBaseCluster; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.Admin; 036import org.apache.hadoop.hbase.client.Durability; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 042import org.apache.hadoop.hbase.coprocessor.CoreCoprocessor; 043import org.apache.hadoop.hbase.coprocessor.HasRegionServerServices; 044import org.apache.hadoop.hbase.coprocessor.ObserverContext; 045import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor; 046import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; 047import org.apache.hadoop.hbase.coprocessor.RegionObserver; 048import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessor; 049import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment; 050import org.apache.hadoop.hbase.coprocessor.RegionServerObserver; 051import org.apache.hadoop.hbase.master.HMaster; 052import org.apache.hadoop.hbase.testclassification.MediumTests; 053import org.apache.hadoop.hbase.testclassification.RegionServerTests; 054import org.apache.hadoop.hbase.util.Bytes; 055import org.apache.hadoop.hbase.util.JVMClusterUtil; 056import org.apache.hadoop.hbase.util.Threads; 057import org.apache.hadoop.hbase.wal.WAL; 058import org.apache.hadoop.hbase.wal.WALEdit; 059import org.apache.hadoop.hdfs.DFSConfigKeys; 060import org.apache.hadoop.hdfs.MiniDFSCluster; 061import org.junit.After; 062import org.junit.Before; 063import org.junit.ClassRule; 064import org.junit.Test; 065import org.junit.experimental.categories.Category; 066import org.slf4j.Logger; 067import org.slf4j.LoggerFactory; 068 069/** 070 * Tests around regionserver shutdown and abort 071 */ 072@Category({RegionServerTests.class, MediumTests.class}) 073public class TestRegionServerAbort { 074 075 @ClassRule 076 public static final HBaseClassTestRule CLASS_RULE = 077 HBaseClassTestRule.forClass(TestRegionServerAbort.class); 078 079 private static final byte[] FAMILY_BYTES = Bytes.toBytes("f"); 080 081 private static final Logger LOG = LoggerFactory.getLogger(TestRegionServerAbort.class); 082 083 private HBaseTestingUtility testUtil; 084 private Configuration conf; 085 private MiniDFSCluster dfsCluster; 086 private MiniHBaseCluster cluster; 087 088 @Before 089 public void setup() throws Exception { 090 testUtil = new HBaseTestingUtility(); 091 conf = testUtil.getConfiguration(); 092 conf.set(CoprocessorHost.REGIONSERVER_COPROCESSOR_CONF_KEY, 093 StopBlockingRegionObserver.class.getName()); 094 conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, 095 StopBlockingRegionObserver.class.getName()); 096 // make sure we have multiple blocks so that the client does not prefetch all block locations 097 conf.set("dfs.blocksize", Long.toString(100 * 1024)); 098 // prefetch the first block 099 conf.set(DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY, Long.toString(100 * 1024)); 100 conf.set(HConstants.REGION_IMPL, ErrorThrowingHRegion.class.getName()); 101 102 testUtil.startMiniZKCluster(); 103 dfsCluster = testUtil.startMiniDFSCluster(2); 104 cluster = testUtil.startMiniHBaseCluster(1, 2); 105 } 106 107 @After 108 public void tearDown() throws Exception { 109 String className = StopBlockingRegionObserver.class.getName(); 110 for (JVMClusterUtil.RegionServerThread t : cluster.getRegionServerThreads()) { 111 HRegionServer rs = t.getRegionServer(); 112 RegionServerCoprocessorHost cpHost = rs.getRegionServerCoprocessorHost(); 113 StopBlockingRegionObserver cp = (StopBlockingRegionObserver)cpHost.findCoprocessor(className); 114 cp.setStopAllowed(true); 115 } 116 HMaster master = cluster.getMaster(); 117 RegionServerCoprocessorHost host = master.getRegionServerCoprocessorHost(); 118 if (host != null) { 119 StopBlockingRegionObserver obs = (StopBlockingRegionObserver) host.findCoprocessor(className); 120 if (obs != null) obs.setStopAllowed(true); 121 } 122 testUtil.shutdownMiniCluster(); 123 } 124 125 /** 126 * Test that a regionserver is able to abort properly, even when a coprocessor 127 * throws an exception in preStopRegionServer(). 128 */ 129 @Test 130 public void testAbortFromRPC() throws Exception { 131 TableName tableName = TableName.valueOf("testAbortFromRPC"); 132 // create a test table 133 Table table = testUtil.createTable(tableName, FAMILY_BYTES); 134 135 // write some edits 136 testUtil.loadTable(table, FAMILY_BYTES); 137 LOG.info("Wrote data"); 138 // force a flush 139 cluster.flushcache(tableName); 140 LOG.info("Flushed table"); 141 142 // Send a poisoned put to trigger the abort 143 Put put = new Put(new byte[]{0, 0, 0, 0}); 144 put.addColumn(FAMILY_BYTES, Bytes.toBytes("c"), new byte[]{}); 145 put.setAttribute(StopBlockingRegionObserver.DO_ABORT, new byte[]{1}); 146 147 List<HRegion> regions = cluster.findRegionsForTable(tableName); 148 HRegion firstRegion = cluster.findRegionsForTable(tableName).get(0); 149 table.put(put); 150 // Verify that the regionserver is stopped 151 assertNotNull(firstRegion); 152 assertNotNull(firstRegion.getRegionServerServices()); 153 LOG.info("isAborted = " + firstRegion.getRegionServerServices().isAborted()); 154 assertTrue(firstRegion.getRegionServerServices().isAborted()); 155 LOG.info("isStopped = " + firstRegion.getRegionServerServices().isStopped()); 156 assertTrue(firstRegion.getRegionServerServices().isStopped()); 157 } 158 159 /** 160 * Test that a coprocessor is able to override a normal regionserver stop request. 161 */ 162 @Test 163 public void testStopOverrideFromCoprocessor() throws Exception { 164 Admin admin = testUtil.getHBaseAdmin(); 165 HRegionServer regionserver = cluster.getRegionServer(0); 166 admin.stopRegionServer(regionserver.getServerName().getHostAndPort()); 167 168 // regionserver should have failed to stop due to coprocessor 169 assertFalse(cluster.getRegionServer(0).isAborted()); 170 assertFalse(cluster.getRegionServer(0).isStopped()); 171 } 172 173 @CoreCoprocessor 174 public static class StopBlockingRegionObserver 175 implements RegionServerCoprocessor, RegionCoprocessor, RegionServerObserver, RegionObserver { 176 public static final String DO_ABORT = "DO_ABORT"; 177 private boolean stopAllowed; 178 179 @Override 180 public Optional<RegionObserver> getRegionObserver() { 181 return Optional.of(this); 182 } 183 184 @Override 185 public Optional<RegionServerObserver> getRegionServerObserver() { 186 return Optional.of(this); 187 } 188 189 @Override 190 public void prePut(ObserverContext<RegionCoprocessorEnvironment> c, Put put, WALEdit edit, 191 Durability durability) throws IOException { 192 if (put.getAttribute(DO_ABORT) != null) { 193 // TODO: Change this so it throws a CP Abort Exception instead. 194 RegionServerServices rss = 195 ((HasRegionServerServices)c.getEnvironment()).getRegionServerServices(); 196 String str = "Aborting for test"; 197 LOG.info(str + " " + rss.getServerName()); 198 rss.abort(str, new Throwable(str)); 199 } 200 } 201 202 @Override 203 public void preStopRegionServer(ObserverContext<RegionServerCoprocessorEnvironment> env) 204 throws IOException { 205 if (!stopAllowed) { 206 throw new IOException("Stop not allowed"); 207 } 208 } 209 210 public void setStopAllowed(boolean allowed) { 211 this.stopAllowed = allowed; 212 } 213 } 214 215 /** 216 * Throws an exception during store file refresh in order to trigger a regionserver abort. 217 */ 218 public static class ErrorThrowingHRegion extends HRegion { 219 public ErrorThrowingHRegion(Path tableDir, WAL wal, FileSystem fs, Configuration confParam, 220 RegionInfo regionInfo, TableDescriptor htd, 221 RegionServerServices rsServices) { 222 super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices); 223 } 224 225 public ErrorThrowingHRegion(HRegionFileSystem fs, WAL wal, Configuration confParam, 226 TableDescriptor htd, RegionServerServices rsServices) { 227 super(fs, wal, confParam, htd, rsServices); 228 } 229 230 @Override 231 protected boolean refreshStoreFiles(boolean force) throws IOException { 232 // forced when called through RegionScannerImpl.handleFileNotFound() 233 if (force) { 234 throw new IOException("Failing file refresh for testing"); 235 } 236 return super.refreshStoreFiles(force); 237 } 238 } 239}