001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.io.IOException; 024import java.util.Collection; 025import java.util.List; 026import java.util.concurrent.CountDownLatch; 027import java.util.concurrent.atomic.AtomicInteger; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.client.Admin; 032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 033import org.apache.hadoop.hbase.client.RegionInfo; 034import org.apache.hadoop.hbase.client.Table; 035import org.apache.hadoop.hbase.client.TableDescriptor; 036import org.apache.hadoop.hbase.regionserver.CompactingMemStore; 037import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 038import org.apache.hadoop.hbase.regionserver.HRegion; 039import org.apache.hadoop.hbase.regionserver.HRegionServer; 040import org.apache.hadoop.hbase.regionserver.HStore; 041import org.apache.hadoop.hbase.regionserver.HStoreFile; 042import org.apache.hadoop.hbase.regionserver.Region; 043import org.apache.hadoop.hbase.regionserver.RegionServerServices; 044import org.apache.hadoop.hbase.regionserver.Store; 045import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 046import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController; 047import org.apache.hadoop.hbase.regionserver.wal.WALUtil; 048import org.apache.hadoop.hbase.security.User; 049import org.apache.hadoop.hbase.testclassification.LargeTests; 050import org.apache.hadoop.hbase.testclassification.MiscTests; 051import org.apache.hadoop.hbase.util.Bytes; 052import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 053import org.apache.hadoop.hbase.wal.WAL; 054import org.junit.ClassRule; 055import org.junit.Test; 056import org.junit.experimental.categories.Category; 057import org.slf4j.Logger; 058import org.slf4j.LoggerFactory; 059 060import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 061 062import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 063import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor; 064 065/** 066 * Test for the case where a regionserver going down has enough cycles to do damage to regions that 067 * have actually been assigned elsehwere. 068 * <p> 069 * If we happen to assign a region before it fully done with in its old location -- i.e. it is on 070 * two servers at the same time -- all can work fine until the case where the region on the dying 071 * server decides to compact or otherwise change the region file set. The region in its new location 072 * will then get a surprise when it tries to do something w/ a file removed by the region in its old 073 * location on dying server. 074 * <p> 075 * Making a test for this case is a little tough in that even if a file is deleted up on the 076 * namenode, if the file was opened before the delete, it will continue to let reads happen until 077 * something changes the state of cached blocks in the dfsclient that was already open (a block from 078 * the deleted file is cleaned from the datanode by NN). 079 * <p> 080 * What we will do below is do an explicit check for existence on the files listed in the region 081 * that has had some files removed because of a compaction. This sort of hurry's along and makes 082 * certain what is a chance occurance. 083 */ 084@Category({MiscTests.class, LargeTests.class}) 085public class TestIOFencing { 086 087 @ClassRule 088 public static final HBaseClassTestRule CLASS_RULE = 089 HBaseClassTestRule.forClass(TestIOFencing.class); 090 091 private static final Logger LOG = LoggerFactory.getLogger(TestIOFencing.class); 092 static { 093 // Uncomment the following lines if more verbosity is needed for 094 // debugging (see HBASE-12285 for details). 095 //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL); 096 //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); 097 //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); 098 //((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem")) 099 // .getLogger().setLevel(Level.ALL); 100 //((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); 101 } 102 103 public abstract static class CompactionBlockerRegion extends HRegion { 104 AtomicInteger compactCount = new AtomicInteger(); 105 volatile CountDownLatch compactionsBlocked = new CountDownLatch(0); 106 volatile CountDownLatch compactionsWaiting = new CountDownLatch(0); 107 108 @SuppressWarnings("deprecation") 109 public CompactionBlockerRegion(Path tableDir, WAL log, 110 FileSystem fs, Configuration confParam, RegionInfo info, 111 TableDescriptor htd, RegionServerServices rsServices) { 112 super(tableDir, log, fs, confParam, info, htd, rsServices); 113 } 114 115 public void stopCompactions() { 116 compactionsBlocked = new CountDownLatch(1); 117 compactionsWaiting = new CountDownLatch(1); 118 } 119 120 public void allowCompactions() { 121 LOG.debug("allowing compactions"); 122 compactionsBlocked.countDown(); 123 } 124 public void waitForCompactionToBlock() throws IOException { 125 try { 126 LOG.debug("waiting for compaction to block"); 127 compactionsWaiting.await(); 128 LOG.debug("compaction block reached"); 129 } catch (InterruptedException ex) { 130 throw new IOException(ex); 131 } 132 } 133 134 @Override 135 public boolean compact(CompactionContext compaction, HStore store, 136 ThroughputController throughputController) throws IOException { 137 try { 138 return super.compact(compaction, store, throughputController); 139 } finally { 140 compactCount.getAndIncrement(); 141 } 142 } 143 144 @Override 145 public boolean compact(CompactionContext compaction, HStore store, 146 ThroughputController throughputController, User user) throws IOException { 147 try { 148 return super.compact(compaction, store, throughputController, user); 149 } finally { 150 compactCount.getAndIncrement(); 151 } 152 } 153 154 public int countStoreFiles() { 155 int count = 0; 156 for (HStore store : stores.values()) { 157 count += store.getStorefilesCount(); 158 } 159 return count; 160 } 161 } 162 163 /** 164 * An override of HRegion that allows us park compactions in a holding pattern and 165 * then when appropriate for the test, allow them proceed again. 166 */ 167 public static class BlockCompactionsInPrepRegion extends CompactionBlockerRegion { 168 169 public BlockCompactionsInPrepRegion(Path tableDir, WAL log, 170 FileSystem fs, Configuration confParam, RegionInfo info, 171 TableDescriptor htd, RegionServerServices rsServices) { 172 super(tableDir, log, fs, confParam, info, htd, rsServices); 173 } 174 @Override 175 protected void doRegionCompactionPrep() throws IOException { 176 compactionsWaiting.countDown(); 177 try { 178 compactionsBlocked.await(); 179 } catch (InterruptedException ex) { 180 throw new IOException(); 181 } 182 super.doRegionCompactionPrep(); 183 } 184 } 185 186 /** 187 * An override of HRegion that allows us park compactions in a holding pattern and 188 * then when appropriate for the test, allow them proceed again. This allows the compaction 189 * entry to go the WAL before blocking, but blocks afterwards 190 */ 191 public static class BlockCompactionsInCompletionRegion extends CompactionBlockerRegion { 192 public BlockCompactionsInCompletionRegion(Path tableDir, WAL log, 193 FileSystem fs, Configuration confParam, RegionInfo info, 194 TableDescriptor htd, RegionServerServices rsServices) { 195 super(tableDir, log, fs, confParam, info, htd, rsServices); 196 } 197 @Override 198 protected HStore instantiateHStore(final ColumnFamilyDescriptor family) throws IOException { 199 return new BlockCompactionsInCompletionHStore(this, family, this.conf); 200 } 201 } 202 203 public static class BlockCompactionsInCompletionHStore extends HStore { 204 CompactionBlockerRegion r; 205 protected BlockCompactionsInCompletionHStore(HRegion region, ColumnFamilyDescriptor family, 206 Configuration confParam) throws IOException { 207 super(region, family, confParam); 208 r = (CompactionBlockerRegion) region; 209 } 210 211 @Override 212 protected void completeCompaction(Collection<HStoreFile> compactedFiles) throws IOException { 213 try { 214 r.compactionsWaiting.countDown(); 215 r.compactionsBlocked.await(); 216 } catch (InterruptedException ex) { 217 throw new IOException(ex); 218 } 219 super.completeCompaction(compactedFiles); 220 } 221 } 222 223 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 224 private final static TableName TABLE_NAME = 225 TableName.valueOf("tabletest"); 226 private final static byte[] FAMILY = Bytes.toBytes("family"); 227 private static final int FIRST_BATCH_COUNT = 4000; 228 private static final int SECOND_BATCH_COUNT = FIRST_BATCH_COUNT; 229 230 /** 231 * Test that puts up a regionserver, starts a compaction on a loaded region but holds the 232 * compaction until after we have killed the server and the region has come up on 233 * a new regionserver altogether. This fakes the double assignment case where region in one 234 * location changes the files out from underneath a region being served elsewhere. 235 */ 236 @Test 237 public void testFencingAroundCompaction() throws Exception { 238 for(MemoryCompactionPolicy policy : MemoryCompactionPolicy.values()) { 239 doTest(BlockCompactionsInPrepRegion.class, policy); 240 } 241 } 242 243 /** 244 * Test that puts up a regionserver, starts a compaction on a loaded region but holds the 245 * compaction completion until after we have killed the server and the region has come up on 246 * a new regionserver altogether. This fakes the double assignment case where region in one 247 * location changes the files out from underneath a region being served elsewhere. 248 */ 249 @Test 250 public void testFencingAroundCompactionAfterWALSync() throws Exception { 251 for(MemoryCompactionPolicy policy : MemoryCompactionPolicy.values()) { 252 doTest(BlockCompactionsInCompletionRegion.class, policy); 253 } 254 } 255 256 public void doTest(Class<?> regionClass, MemoryCompactionPolicy policy) throws Exception { 257 Configuration c = TEST_UTIL.getConfiguration(); 258 // Insert our custom region 259 c.setClass(HConstants.REGION_IMPL, regionClass, HRegion.class); 260 // Encourage plenty of flushes 261 c.setLong("hbase.hregion.memstore.flush.size", 25000); 262 c.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); 263 // Only run compaction when we tell it to 264 c.setInt("hbase.hstore.compaction.min",1); 265 c.setInt("hbase.hstore.compactionThreshold", 1000); 266 c.setLong("hbase.hstore.blockingStoreFiles", 1000); 267 // Compact quickly after we tell it to! 268 c.setInt("hbase.regionserver.thread.splitcompactcheckfrequency", 1000); 269 c.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, String.valueOf(policy)); 270 LOG.info("Starting mini cluster"); 271 TEST_UTIL.startMiniCluster(1); 272 CompactionBlockerRegion compactingRegion = null; 273 Admin admin = null; 274 try { 275 LOG.info("Creating admin"); 276 admin = TEST_UTIL.getConnection().getAdmin(); 277 LOG.info("Creating table"); 278 TEST_UTIL.createTable(TABLE_NAME, FAMILY); 279 Table table = TEST_UTIL.getConnection().getTable(TABLE_NAME); 280 LOG.info("Loading test table"); 281 // Find the region 282 List<HRegion> testRegions = TEST_UTIL.getMiniHBaseCluster().findRegionsForTable(TABLE_NAME); 283 assertEquals(1, testRegions.size()); 284 compactingRegion = (CompactionBlockerRegion)testRegions.get(0); 285 LOG.info("Blocking compactions"); 286 compactingRegion.stopCompactions(); 287 long lastFlushTime = compactingRegion.getEarliestFlushTimeForAllStores(); 288 // Load some rows 289 TEST_UTIL.loadNumericRows(table, FAMILY, 0, FIRST_BATCH_COUNT); 290 291 // add a compaction from an older (non-existing) region to see whether we successfully skip 292 // those entries 293 HRegionInfo oldHri = new HRegionInfo(table.getName(), 294 HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW); 295 CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor(oldHri, 296 FAMILY, Lists.newArrayList(new Path("/a")), Lists.newArrayList(new Path("/b")), 297 new Path("store_dir")); 298 WALUtil.writeCompactionMarker(compactingRegion.getWAL(), 299 ((HRegion)compactingRegion).getReplicationScope(), 300 oldHri, compactionDescriptor, compactingRegion.getMVCC()); 301 302 // Wait till flush has happened, otherwise there won't be multiple store files 303 long startWaitTime = System.currentTimeMillis(); 304 while (compactingRegion.getEarliestFlushTimeForAllStores() <= lastFlushTime || 305 compactingRegion.countStoreFiles() <= 1) { 306 LOG.info("Waiting for the region to flush " + 307 compactingRegion.getRegionInfo().getRegionNameAsString()); 308 Thread.sleep(1000); 309 admin.flush(table.getName()); 310 assertTrue("Timed out waiting for the region to flush", 311 System.currentTimeMillis() - startWaitTime < 30000); 312 } 313 assertTrue(compactingRegion.countStoreFiles() > 1); 314 final byte REGION_NAME[] = compactingRegion.getRegionInfo().getRegionName(); 315 LOG.info("Asking for compaction"); 316 admin.majorCompact(TABLE_NAME); 317 LOG.info("Waiting for compaction to be about to start"); 318 compactingRegion.waitForCompactionToBlock(); 319 LOG.info("Starting a new server"); 320 RegionServerThread newServerThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer(); 321 final HRegionServer newServer = newServerThread.getRegionServer(); 322 LOG.info("Killing region server ZK lease"); 323 TEST_UTIL.expireRegionServerSession(0); 324 CompactionBlockerRegion newRegion = null; 325 startWaitTime = System.currentTimeMillis(); 326 LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME)); 327 328 // wait for region to be assigned and to go out of log replay if applicable 329 Waiter.waitFor(c, 60000, new Waiter.Predicate<Exception>() { 330 @Override 331 public boolean evaluate() throws Exception { 332 Region newRegion = newServer.getOnlineRegion(REGION_NAME); 333 return newRegion != null; 334 } 335 }); 336 337 newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME); 338 339 // After compaction of old region finishes on the server that was going down, make sure that 340 // all the files we expect are still working when region is up in new location. 341 FileSystem fs = newRegion.getFilesystem(); 342 for (String f: newRegion.getStoreFileList(new byte [][] {FAMILY})) { 343 assertTrue("After compaction, does not exist: " + f, fs.exists(new Path(f))); 344 } 345 LOG.info("Allowing compaction to proceed"); 346 compactingRegion.allowCompactions(); 347 while (compactingRegion.compactCount.get() == 0) { 348 Thread.sleep(1000); 349 } 350 // The server we killed stays up until the compaction that was started before it was killed 351 // completes. In logs you should see the old regionserver now going down. 352 LOG.info("Compaction finished"); 353 354 // If we survive the split keep going... 355 // Now we make sure that the region isn't totally confused. Load up more rows. 356 TEST_UTIL.loadNumericRows(table, FAMILY, FIRST_BATCH_COUNT, 357 FIRST_BATCH_COUNT + SECOND_BATCH_COUNT); 358 admin.majorCompact(TABLE_NAME); 359 startWaitTime = System.currentTimeMillis(); 360 while (newRegion.compactCount.get() == 0) { 361 Thread.sleep(1000); 362 assertTrue("New region never compacted", 363 System.currentTimeMillis() - startWaitTime < 180000); 364 } 365 int count; 366 for (int i = 0;; i++) { 367 try { 368 count = TEST_UTIL.countRows(table); 369 break; 370 } catch (DoNotRetryIOException e) { 371 // wait up to 30s 372 if (i >= 30 || !e.getMessage().contains("File does not exist")) { 373 throw e; 374 } 375 Thread.sleep(1000); 376 } 377 } 378 if (policy == MemoryCompactionPolicy.EAGER || policy == MemoryCompactionPolicy.ADAPTIVE) { 379 assertTrue(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT >= count); 380 } else { 381 assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, count); 382 } 383 } finally { 384 if (compactingRegion != null) { 385 compactingRegion.allowCompactions(); 386 } 387 admin.close(); 388 TEST_UTIL.shutdownMiniCluster(); 389 } 390 } 391}