001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023import static org.junit.Assert.fail; 024 025import java.io.FileNotFoundException; 026import java.io.IOException; 027import java.lang.reflect.Method; 028import java.security.PrivilegedExceptionAction; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Collections; 032import java.util.HashMap; 033import java.util.HashSet; 034import java.util.List; 035import java.util.Map; 036import java.util.NavigableSet; 037import java.util.Objects; 038import java.util.Set; 039import java.util.concurrent.atomic.AtomicBoolean; 040import java.util.concurrent.atomic.AtomicInteger; 041import java.util.concurrent.atomic.AtomicLong; 042import java.util.stream.Collectors; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.fs.FSDataInputStream; 045import org.apache.hadoop.fs.FSDataOutputStream; 046import org.apache.hadoop.fs.FileStatus; 047import org.apache.hadoop.fs.FileSystem; 048import org.apache.hadoop.fs.FileUtil; 049import org.apache.hadoop.fs.Path; 050import org.apache.hadoop.fs.PathFilter; 051import org.apache.hadoop.hbase.Cell; 052import org.apache.hadoop.hbase.HBaseClassTestRule; 053import org.apache.hadoop.hbase.HBaseConfiguration; 054import org.apache.hadoop.hbase.HBaseTestingUtility; 055import org.apache.hadoop.hbase.HConstants; 056import org.apache.hadoop.hbase.KeyValue; 057import org.apache.hadoop.hbase.ServerName; 058import org.apache.hadoop.hbase.TableName; 059import org.apache.hadoop.hbase.client.RegionInfo; 060import org.apache.hadoop.hbase.client.RegionInfoBuilder; 061import org.apache.hadoop.hbase.regionserver.HRegion; 062import org.apache.hadoop.hbase.regionserver.wal.FaultyProtobufLogReader; 063import org.apache.hadoop.hbase.regionserver.wal.InstrumentedLogWriter; 064import org.apache.hadoop.hbase.regionserver.wal.ProtobufLogReader; 065import org.apache.hadoop.hbase.security.User; 066import org.apache.hadoop.hbase.testclassification.LargeTests; 067import org.apache.hadoop.hbase.testclassification.RegionServerTests; 068import org.apache.hadoop.hbase.util.Bytes; 069import org.apache.hadoop.hbase.util.CancelableProgressable; 070import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 071import org.apache.hadoop.hbase.util.FSUtils; 072import org.apache.hadoop.hbase.util.Threads; 073import org.apache.hadoop.hbase.wal.WAL.Entry; 074import org.apache.hadoop.hbase.wal.WAL.Reader; 075import org.apache.hadoop.hbase.wal.WALProvider.Writer; 076import org.apache.hadoop.hbase.wal.WALSplitter.CorruptedLogFileException; 077import org.apache.hadoop.hdfs.DFSTestUtil; 078import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; 079import org.apache.hadoop.ipc.RemoteException; 080import org.junit.After; 081import org.junit.AfterClass; 082import org.junit.Before; 083import org.junit.BeforeClass; 084import org.junit.ClassRule; 085import org.junit.Rule; 086import org.junit.Test; 087import org.junit.experimental.categories.Category; 088import org.junit.rules.TestName; 089import org.mockito.Mockito; 090import org.mockito.invocation.InvocationOnMock; 091import org.mockito.stubbing.Answer; 092import org.slf4j.Logger; 093import org.slf4j.LoggerFactory; 094 095import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 096import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList; 097import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableMap; 098import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; 099 100import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 101import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; 102 103/** 104 * Testing {@link WAL} splitting code. 105 */ 106@Category({RegionServerTests.class, LargeTests.class}) 107public class TestWALSplit { 108 109 @ClassRule 110 public static final HBaseClassTestRule CLASS_RULE = 111 HBaseClassTestRule.forClass(TestWALSplit.class); 112 113 { 114 // Uncomment the following lines if more verbosity is needed for 115 // debugging (see HBASE-12285 for details). 116 //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); 117 //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); 118 //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL); 119 } 120 private final static Logger LOG = LoggerFactory.getLogger(TestWALSplit.class); 121 122 private static Configuration conf; 123 private FileSystem fs; 124 125 protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 126 127 private Path HBASEDIR; 128 private Path HBASELOGDIR; 129 private Path WALDIR; 130 private Path OLDLOGDIR; 131 private Path CORRUPTDIR; 132 private Path TABLEDIR; 133 private String TMPDIRNAME; 134 135 private static final int NUM_WRITERS = 10; 136 private static final int ENTRIES = 10; // entries per writer per region 137 138 private static final String FILENAME_BEING_SPLIT = "testfile"; 139 private static final TableName TABLE_NAME = 140 TableName.valueOf("t1"); 141 private static final byte[] FAMILY = Bytes.toBytes("f1"); 142 private static final byte[] QUALIFIER = Bytes.toBytes("q1"); 143 private static final byte[] VALUE = Bytes.toBytes("v1"); 144 private static final String WAL_FILE_PREFIX = "wal.dat."; 145 private static List<String> REGIONS = new ArrayList<>(); 146 private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors"; 147 private static String ROBBER; 148 private static String ZOMBIE; 149 private static String [] GROUP = new String [] {"supergroup"}; 150 151 static enum Corruptions { 152 INSERT_GARBAGE_ON_FIRST_LINE, 153 INSERT_GARBAGE_IN_THE_MIDDLE, 154 APPEND_GARBAGE, 155 TRUNCATE, 156 TRUNCATE_TRAILER 157 } 158 159 @BeforeClass 160 public static void setUpBeforeClass() throws Exception { 161 conf = TEST_UTIL.getConfiguration(); 162 conf.setClass("hbase.regionserver.hlog.writer.impl", 163 InstrumentedLogWriter.class, Writer.class); 164 // This is how you turn off shortcircuit read currently. TODO: Fix. Should read config. 165 System.setProperty("hbase.tests.use.shortcircuit.reads", "false"); 166 // Create fake maping user to group and set it to the conf. 167 Map<String, String []> u2g_map = new HashMap<>(2); 168 ROBBER = User.getCurrent().getName() + "-robber"; 169 ZOMBIE = User.getCurrent().getName() + "-zombie"; 170 u2g_map.put(ROBBER, GROUP); 171 u2g_map.put(ZOMBIE, GROUP); 172 DFSTestUtil.updateConfWithFakeGroupMapping(conf, u2g_map); 173 conf.setInt("dfs.heartbeat.interval", 1); 174 TEST_UTIL.startMiniDFSCluster(2); 175 } 176 177 @AfterClass 178 public static void tearDownAfterClass() throws Exception { 179 TEST_UTIL.shutdownMiniDFSCluster(); 180 } 181 182 @Rule 183 public TestName name = new TestName(); 184 private WALFactory wals = null; 185 186 @Before 187 public void setUp() throws Exception { 188 LOG.info("Cleaning up cluster for new test."); 189 fs = TEST_UTIL.getDFSCluster().getFileSystem(); 190 HBASEDIR = TEST_UTIL.createRootDir(); 191 HBASELOGDIR = TEST_UTIL.createWALRootDir(); 192 OLDLOGDIR = new Path(HBASELOGDIR, HConstants.HREGION_OLDLOGDIR_NAME); 193 CORRUPTDIR = new Path(HBASELOGDIR, HConstants.CORRUPT_DIR_NAME); 194 TABLEDIR = FSUtils.getTableDir(HBASEDIR, TABLE_NAME); 195 TMPDIRNAME = conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, 196 HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY); 197 REGIONS.clear(); 198 Collections.addAll(REGIONS, "bbb", "ccc"); 199 InstrumentedLogWriter.activateFailure = false; 200 wals = new WALFactory(conf, name.getMethodName()); 201 WALDIR = new Path(HBASELOGDIR, 202 AbstractFSWALProvider.getWALDirectoryName(ServerName.valueOf(name.getMethodName(), 203 16010, System.currentTimeMillis()).toString())); 204 //fs.mkdirs(WALDIR); 205 } 206 207 @After 208 public void tearDown() throws Exception { 209 try { 210 wals.close(); 211 } catch(IOException exception) { 212 // Some tests will move WALs out from under us. In those cases, we'll get an error on close. 213 LOG.info("Ignoring an error while closing down our WALFactory. Fine for some tests, but if" + 214 " you see a failure look here."); 215 LOG.debug("exception details", exception); 216 } finally { 217 wals = null; 218 fs.delete(HBASEDIR, true); 219 fs.delete(HBASELOGDIR, true); 220 } 221 } 222 223 /** 224 * Simulates splitting a WAL out from under a regionserver that is still trying to write it. 225 * Ensures we do not lose edits. 226 * @throws IOException 227 * @throws InterruptedException 228 */ 229 @Test 230 public void testLogCannotBeWrittenOnceParsed() throws IOException, InterruptedException { 231 final AtomicLong counter = new AtomicLong(0); 232 AtomicBoolean stop = new AtomicBoolean(false); 233 // Region we'll write edits too and then later examine to make sure they all made it in. 234 final String region = REGIONS.get(0); 235 final int numWriters = 3; 236 Thread zombie = new ZombieLastLogWriterRegionServer(counter, stop, region, numWriters); 237 try { 238 long startCount = counter.get(); 239 zombie.start(); 240 // Wait till writer starts going. 241 while (startCount == counter.get()) Threads.sleep(1); 242 // Give it a second to write a few appends. 243 Threads.sleep(1000); 244 final Configuration conf2 = HBaseConfiguration.create(conf); 245 final User robber = User.createUserForTesting(conf2, ROBBER, GROUP); 246 int count = robber.runAs(new PrivilegedExceptionAction<Integer>() { 247 @Override 248 public Integer run() throws Exception { 249 StringBuilder ls = new StringBuilder("Contents of WALDIR (").append(WALDIR) 250 .append("):\n"); 251 for (FileStatus status : fs.listStatus(WALDIR)) { 252 ls.append("\t").append(status.toString()).append("\n"); 253 } 254 LOG.debug(Objects.toString(ls)); 255 LOG.info("Splitting WALs out from under zombie. Expecting " + numWriters + " files."); 256 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf2, wals); 257 LOG.info("Finished splitting out from under zombie."); 258 Path[] logfiles = getLogForRegion(TABLE_NAME, region); 259 assertEquals("wrong number of split files for region", numWriters, logfiles.length); 260 int count = 0; 261 for (Path logfile: logfiles) { 262 count += countWAL(logfile); 263 } 264 return count; 265 } 266 }); 267 LOG.info("zombie=" + counter.get() + ", robber=" + count); 268 assertTrue("The log file could have at most 1 extra log entry, but can't have less. " + 269 "Zombie could write " + counter.get() + " and logfile had only " + count, 270 counter.get() == count || counter.get() + 1 == count); 271 } finally { 272 stop.set(true); 273 zombie.interrupt(); 274 Threads.threadDumpingIsAlive(zombie); 275 } 276 } 277 278 /** 279 * This thread will keep writing to a 'wal' file even after the split process has started. 280 * It simulates a region server that was considered dead but woke up and wrote some more to the 281 * last log entry. Does its writing as an alternate user in another filesystem instance to 282 * simulate better it being a regionserver. 283 */ 284 class ZombieLastLogWriterRegionServer extends Thread { 285 final AtomicLong editsCount; 286 final AtomicBoolean stop; 287 final int numOfWriters; 288 /** 289 * Region to write edits for. 290 */ 291 final String region; 292 final User user; 293 294 public ZombieLastLogWriterRegionServer(AtomicLong counter, AtomicBoolean stop, 295 final String region, final int writers) 296 throws IOException, InterruptedException { 297 super("ZombieLastLogWriterRegionServer"); 298 setDaemon(true); 299 this.stop = stop; 300 this.editsCount = counter; 301 this.region = region; 302 this.user = User.createUserForTesting(conf, ZOMBIE, GROUP); 303 numOfWriters = writers; 304 } 305 306 @Override 307 public void run() { 308 try { 309 doWriting(); 310 } catch (IOException e) { 311 LOG.warn(getName() + " Writer exiting " + e); 312 } catch (InterruptedException e) { 313 LOG.warn(getName() + " Writer exiting " + e); 314 } 315 } 316 317 private void doWriting() throws IOException, InterruptedException { 318 this.user.runAs(new PrivilegedExceptionAction<Object>() { 319 @Override 320 public Object run() throws Exception { 321 // Index of the WAL we want to keep open. generateWALs will leave open the WAL whose 322 // index we supply here. 323 int walToKeepOpen = numOfWriters - 1; 324 // The below method writes numOfWriters files each with ENTRIES entries for a total of 325 // numOfWriters * ENTRIES added per column family in the region. 326 Writer writer = null; 327 try { 328 writer = generateWALs(numOfWriters, ENTRIES, walToKeepOpen); 329 } catch (IOException e1) { 330 throw new RuntimeException("Failed", e1); 331 } 332 // Update counter so has all edits written so far. 333 editsCount.addAndGet(numOfWriters * ENTRIES); 334 loop(writer); 335 // If we've been interruped, then things should have shifted out from under us. 336 // closing should error 337 try { 338 writer.close(); 339 fail("Writing closing after parsing should give an error."); 340 } catch (IOException exception) { 341 LOG.debug("ignoring error when closing final writer.", exception); 342 } 343 return null; 344 } 345 }); 346 } 347 348 private void loop(final Writer writer) { 349 byte [] regionBytes = Bytes.toBytes(this.region); 350 while (!stop.get()) { 351 try { 352 long seq = appendEntry(writer, TABLE_NAME, regionBytes, 353 Bytes.toBytes("r" + editsCount.get()), regionBytes, QUALIFIER, VALUE, 0); 354 long count = editsCount.incrementAndGet(); 355 LOG.info(getName() + " sync count=" + count + ", seq=" + seq); 356 try { 357 Thread.sleep(1); 358 } catch (InterruptedException e) { 359 // 360 } 361 } catch (IOException ex) { 362 LOG.error(getName() + " ex " + ex.toString()); 363 if (ex instanceof RemoteException) { 364 LOG.error("Juliet: got RemoteException " + ex.getMessage() + 365 " while writing " + (editsCount.get() + 1)); 366 } else { 367 LOG.error(getName() + " failed to write....at " + editsCount.get()); 368 fail("Failed to write " + editsCount.get()); 369 } 370 break; 371 } catch (Throwable t) { 372 LOG.error(getName() + " HOW? " + t); 373 LOG.debug("exception details", t); 374 break; 375 } 376 } 377 LOG.info(getName() + " Writer exiting"); 378 } 379 } 380 381 /** 382 * {@see https://issues.apache.org/jira/browse/HBASE-3020} 383 */ 384 @Test 385 public void testRecoveredEditsPathForMeta() throws IOException { 386 byte[] encoded = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(); 387 Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME); 388 Path regiondir = new Path(tdir, 389 RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName()); 390 fs.mkdirs(regiondir); 391 long now = System.currentTimeMillis(); 392 Entry entry = 393 new Entry(new WALKeyImpl(encoded, 394 TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID), 395 new WALEdit()); 396 Path p = WALSplitter.getRegionSplitEditsPath(entry, 397 FILENAME_BEING_SPLIT, TMPDIRNAME, conf); 398 String parentOfParent = p.getParent().getParent().getName(); 399 assertEquals(parentOfParent, RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName()); 400 } 401 402 /** 403 * Test old recovered edits file doesn't break WALSplitter. 404 * This is useful in upgrading old instances. 405 */ 406 @Test 407 public void testOldRecoveredEditsFileSidelined() throws IOException { 408 byte [] encoded = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(); 409 Path tdir = FSUtils.getTableDir(HBASEDIR, TableName.META_TABLE_NAME); 410 Path regiondir = new Path(tdir, 411 RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName()); 412 fs.mkdirs(regiondir); 413 long now = System.currentTimeMillis(); 414 Entry entry = 415 new Entry(new WALKeyImpl(encoded, 416 TableName.META_TABLE_NAME, 1, now, HConstants.DEFAULT_CLUSTER_ID), 417 new WALEdit()); 418 Path parent = WALSplitter.getRegionDirRecoveredEditsDir(regiondir); 419 assertEquals(HConstants.RECOVERED_EDITS_DIR, parent.getName()); 420 fs.createNewFile(parent); // create a recovered.edits file 421 422 Path p = WALSplitter.getRegionSplitEditsPath(entry, 423 FILENAME_BEING_SPLIT, TMPDIRNAME, conf); 424 String parentOfParent = p.getParent().getParent().getName(); 425 assertEquals(parentOfParent, RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName()); 426 WALFactory.createRecoveredEditsWriter(fs, p, conf).close(); 427 } 428 429 private void useDifferentDFSClient() throws IOException { 430 // make fs act as a different client now 431 // initialize will create a new DFSClient with a new client ID 432 fs.initialize(fs.getUri(), conf); 433 } 434 435 @Test 436 public void testSplitPreservesEdits() throws IOException{ 437 final String REGION = "region__1"; 438 REGIONS.clear(); 439 REGIONS.add(REGION); 440 441 generateWALs(1, 10, -1, 0); 442 useDifferentDFSClient(); 443 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 444 Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath(); 445 Path[] splitLog = getLogForRegion(TABLE_NAME, REGION); 446 assertEquals(1, splitLog.length); 447 448 assertTrue("edits differ after split", logsAreEqual(originalLog, splitLog[0])); 449 } 450 451 @Test 452 public void testSplitRemovesRegionEventsEdits() throws IOException{ 453 final String REGION = "region__1"; 454 REGIONS.clear(); 455 REGIONS.add(REGION); 456 457 generateWALs(1, 10, -1, 100); 458 useDifferentDFSClient(); 459 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 460 Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath(); 461 Path[] splitLog = getLogForRegion(TABLE_NAME, REGION); 462 assertEquals(1, splitLog.length); 463 464 assertFalse("edits differ after split", logsAreEqual(originalLog, splitLog[0])); 465 // split log should only have the test edits 466 assertEquals(10, countWAL(splitLog[0])); 467 } 468 469 470 @Test 471 public void testSplitLeavesCompactionEventsEdits() throws IOException{ 472 RegionInfo hri = RegionInfoBuilder.newBuilder(TABLE_NAME).build(); 473 REGIONS.clear(); 474 REGIONS.add(hri.getEncodedName()); 475 Path regionDir = new Path(FSUtils.getTableDir(HBASEDIR, TABLE_NAME), hri.getEncodedName()); 476 LOG.info("Creating region directory: " + regionDir); 477 assertTrue(fs.mkdirs(regionDir)); 478 479 Writer writer = generateWALs(1, 10, 0, 10); 480 String[] compactInputs = new String[]{"file1", "file2", "file3"}; 481 String compactOutput = "file4"; 482 appendCompactionEvent(writer, hri, compactInputs, compactOutput); 483 writer.close(); 484 485 useDifferentDFSClient(); 486 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 487 488 Path originalLog = (fs.listStatus(OLDLOGDIR))[0].getPath(); 489 // original log should have 10 test edits, 10 region markers, 1 compaction marker 490 assertEquals(21, countWAL(originalLog)); 491 492 Path[] splitLog = getLogForRegion(TABLE_NAME, hri.getEncodedName()); 493 assertEquals(1, splitLog.length); 494 495 assertFalse("edits differ after split", logsAreEqual(originalLog, splitLog[0])); 496 // split log should have 10 test edits plus 1 compaction marker 497 assertEquals(11, countWAL(splitLog[0])); 498 } 499 500 /** 501 * @param expectedEntries -1 to not assert 502 * @return the count across all regions 503 */ 504 private int splitAndCount(final int expectedFiles, final int expectedEntries) 505 throws IOException { 506 useDifferentDFSClient(); 507 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 508 int result = 0; 509 for (String region : REGIONS) { 510 Path[] logfiles = getLogForRegion(TABLE_NAME, region); 511 assertEquals(expectedFiles, logfiles.length); 512 int count = 0; 513 for (Path logfile: logfiles) { 514 count += countWAL(logfile); 515 } 516 if (-1 != expectedEntries) { 517 assertEquals(expectedEntries, count); 518 } 519 result += count; 520 } 521 return result; 522 } 523 524 @Test 525 public void testEmptyLogFiles() throws IOException { 526 testEmptyLogFiles(true); 527 } 528 529 @Test 530 public void testEmptyOpenLogFiles() throws IOException { 531 testEmptyLogFiles(false); 532 } 533 534 private void testEmptyLogFiles(final boolean close) throws IOException { 535 // we won't create the hlog dir until getWAL got called, so 536 // make dir here when testing empty log file 537 fs.mkdirs(WALDIR); 538 injectEmptyFile(".empty", close); 539 generateWALs(Integer.MAX_VALUE); 540 injectEmptyFile("empty", close); 541 splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); // skip 2 empty 542 } 543 544 @Test 545 public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException { 546 // generate logs but leave wal.dat.5 open. 547 generateWALs(5); 548 splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); 549 } 550 551 @Test 552 public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException { 553 conf.setBoolean(HBASE_SKIP_ERRORS, true); 554 generateWALs(Integer.MAX_VALUE); 555 corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"), 556 Corruptions.APPEND_GARBAGE, true); 557 splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); 558 } 559 560 @Test 561 public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException { 562 conf.setBoolean(HBASE_SKIP_ERRORS, true); 563 generateWALs(Integer.MAX_VALUE); 564 corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"), 565 Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true); 566 splitAndCount(NUM_WRITERS - 1, (NUM_WRITERS - 1) * ENTRIES); //1 corrupt 567 } 568 569 @Test 570 public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException { 571 conf.setBoolean(HBASE_SKIP_ERRORS, true); 572 generateWALs(Integer.MAX_VALUE); 573 corruptWAL(new Path(WALDIR, WAL_FILE_PREFIX + "5"), 574 Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false); 575 // the entries in the original logs are alternating regions 576 // considering the sequence file header, the middle corruption should 577 // affect at least half of the entries 578 int goodEntries = (NUM_WRITERS - 1) * ENTRIES; 579 int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1; 580 int allRegionsCount = splitAndCount(NUM_WRITERS, -1); 581 assertTrue("The file up to the corrupted area hasn't been parsed", 582 REGIONS.size() * (goodEntries + firstHalfEntries) <= allRegionsCount); 583 } 584 585 @Test 586 public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException { 587 conf.setBoolean(HBASE_SKIP_ERRORS, true); 588 List<FaultyProtobufLogReader.FailureType> failureTypes = Arrays 589 .asList(FaultyProtobufLogReader.FailureType.values()).stream() 590 .filter(x -> x != FaultyProtobufLogReader.FailureType.NONE).collect(Collectors.toList()); 591 for (FaultyProtobufLogReader.FailureType failureType : failureTypes) { 592 final Set<String> walDirContents = splitCorruptWALs(failureType); 593 final Set<String> archivedLogs = new HashSet<>(); 594 final StringBuilder archived = new StringBuilder("Archived logs in CORRUPTDIR:"); 595 for (FileStatus log : fs.listStatus(CORRUPTDIR)) { 596 archived.append("\n\t").append(log.toString()); 597 archivedLogs.add(log.getPath().getName()); 598 } 599 LOG.debug(archived.toString()); 600 assertEquals(failureType.name() + ": expected to find all of our wals corrupt.", archivedLogs, 601 walDirContents); 602 } 603 } 604 605 /** 606 * @return set of wal names present prior to split attempt. 607 * @throws IOException if the split process fails 608 */ 609 private Set<String> splitCorruptWALs(final FaultyProtobufLogReader.FailureType failureType) 610 throws IOException { 611 Class<?> backupClass = conf.getClass("hbase.regionserver.hlog.reader.impl", 612 Reader.class); 613 InstrumentedLogWriter.activateFailure = false; 614 615 try { 616 conf.setClass("hbase.regionserver.hlog.reader.impl", FaultyProtobufLogReader.class, 617 Reader.class); 618 conf.set("faultyprotobuflogreader.failuretype", failureType.name()); 619 // Clean up from previous tests or previous loop 620 try { 621 wals.shutdown(); 622 } catch (IOException exception) { 623 // since we're splitting out from under the factory, we should expect some closing failures. 624 LOG.debug("Ignoring problem closing WALFactory.", exception); 625 } 626 wals.close(); 627 try { 628 for (FileStatus log : fs.listStatus(CORRUPTDIR)) { 629 fs.delete(log.getPath(), true); 630 } 631 } catch (FileNotFoundException exception) { 632 LOG.debug("no previous CORRUPTDIR to clean."); 633 } 634 // change to the faulty reader 635 wals = new WALFactory(conf, name.getMethodName()); 636 generateWALs(-1); 637 // Our reader will render all of these files corrupt. 638 final Set<String> walDirContents = new HashSet<>(); 639 for (FileStatus status : fs.listStatus(WALDIR)) { 640 walDirContents.add(status.getPath().getName()); 641 } 642 useDifferentDFSClient(); 643 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 644 return walDirContents; 645 } finally { 646 conf.setClass("hbase.regionserver.hlog.reader.impl", backupClass, 647 Reader.class); 648 } 649 } 650 651 @Test (expected = IOException.class) 652 public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows() 653 throws IOException { 654 conf.setBoolean(HBASE_SKIP_ERRORS, false); 655 splitCorruptWALs(FaultyProtobufLogReader.FailureType.BEGINNING); 656 } 657 658 @Test 659 public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs() 660 throws IOException { 661 conf.setBoolean(HBASE_SKIP_ERRORS, false); 662 try { 663 splitCorruptWALs(FaultyProtobufLogReader.FailureType.BEGINNING); 664 } catch (IOException e) { 665 LOG.debug("split with 'skip errors' set to 'false' correctly threw"); 666 } 667 assertEquals("if skip.errors is false all files should remain in place", 668 NUM_WRITERS, fs.listStatus(WALDIR).length); 669 } 670 671 private void ignoreCorruption(final Corruptions corruption, final int entryCount, 672 final int expectedCount) throws IOException { 673 conf.setBoolean(HBASE_SKIP_ERRORS, false); 674 675 final String REGION = "region__1"; 676 REGIONS.clear(); 677 REGIONS.add(REGION); 678 679 Path c1 = new Path(WALDIR, WAL_FILE_PREFIX + "0"); 680 generateWALs(1, entryCount, -1, 0); 681 corruptWAL(c1, corruption, true); 682 683 useDifferentDFSClient(); 684 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 685 686 Path[] splitLog = getLogForRegion(TABLE_NAME, REGION); 687 assertEquals(1, splitLog.length); 688 689 int actualCount = 0; 690 Reader in = wals.createReader(fs, splitLog[0]); 691 @SuppressWarnings("unused") 692 Entry entry; 693 while ((entry = in.next()) != null) ++actualCount; 694 assertEquals(expectedCount, actualCount); 695 in.close(); 696 697 // should not have stored the EOF files as corrupt 698 FileStatus[] archivedLogs = fs.listStatus(CORRUPTDIR); 699 assertEquals(0, archivedLogs.length); 700 701 } 702 703 @Test 704 public void testEOFisIgnored() throws IOException { 705 int entryCount = 10; 706 ignoreCorruption(Corruptions.TRUNCATE, entryCount, entryCount-1); 707 } 708 709 @Test 710 public void testCorruptWALTrailer() throws IOException { 711 int entryCount = 10; 712 ignoreCorruption(Corruptions.TRUNCATE_TRAILER, entryCount, entryCount); 713 } 714 715 @Test 716 public void testLogsGetArchivedAfterSplit() throws IOException { 717 conf.setBoolean(HBASE_SKIP_ERRORS, false); 718 generateWALs(-1); 719 useDifferentDFSClient(); 720 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 721 FileStatus[] archivedLogs = fs.listStatus(OLDLOGDIR); 722 assertEquals("wrong number of files in the archive log", NUM_WRITERS, archivedLogs.length); 723 } 724 725 @Test 726 public void testSplit() throws IOException { 727 generateWALs(-1); 728 splitAndCount(NUM_WRITERS, NUM_WRITERS * ENTRIES); 729 } 730 731 @Test 732 public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit() 733 throws IOException { 734 generateWALs(-1); 735 useDifferentDFSClient(); 736 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 737 FileStatus [] statuses = null; 738 try { 739 statuses = fs.listStatus(WALDIR); 740 if (statuses != null) { 741 fail("Files left in log dir: " + 742 Joiner.on(",").join(FileUtil.stat2Paths(statuses))); 743 } 744 } catch (FileNotFoundException e) { 745 // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null 746 } 747 } 748 749 @Test(expected = IOException.class) 750 public void testSplitWillFailIfWritingToRegionFails() throws Exception { 751 //leave 5th log open so we could append the "trap" 752 Writer writer = generateWALs(4); 753 useDifferentDFSClient(); 754 755 String region = "break"; 756 Path regiondir = new Path(TABLEDIR, region); 757 fs.mkdirs(regiondir); 758 759 InstrumentedLogWriter.activateFailure = false; 760 appendEntry(writer, TABLE_NAME, Bytes.toBytes(region), 761 Bytes.toBytes("r" + 999), FAMILY, QUALIFIER, VALUE, 0); 762 writer.close(); 763 764 try { 765 InstrumentedLogWriter.activateFailure = true; 766 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 767 } catch (IOException e) { 768 assertTrue(e.getMessage(). 769 contains("This exception is instrumented and should only be thrown for testing")); 770 throw e; 771 } finally { 772 InstrumentedLogWriter.activateFailure = false; 773 } 774 } 775 776 @Test 777 public void testSplitDeletedRegion() throws IOException { 778 REGIONS.clear(); 779 String region = "region_that_splits"; 780 REGIONS.add(region); 781 782 generateWALs(1); 783 useDifferentDFSClient(); 784 785 Path regiondir = new Path(TABLEDIR, region); 786 fs.delete(regiondir, true); 787 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 788 assertFalse(fs.exists(regiondir)); 789 } 790 791 @Test 792 public void testIOEOnOutputThread() throws Exception { 793 conf.setBoolean(HBASE_SKIP_ERRORS, false); 794 795 generateWALs(-1); 796 useDifferentDFSClient(); 797 FileStatus[] logfiles = fs.listStatus(WALDIR); 798 assertTrue("There should be some log file", 799 logfiles != null && logfiles.length > 0); 800 // wals with no entries (like the one we don't use in the factory) 801 // won't cause a failure since nothing will ever be written. 802 // pick the largest one since it's most likely to have entries. 803 int largestLogFile = 0; 804 long largestSize = 0; 805 for (int i = 0; i < logfiles.length; i++) { 806 if (logfiles[i].getLen() > largestSize) { 807 largestLogFile = i; 808 largestSize = logfiles[i].getLen(); 809 } 810 } 811 assertTrue("There should be some log greater than size 0.", 0 < largestSize); 812 // Set up a splitter that will throw an IOE on the output side 813 WALSplitter logSplitter = new WALSplitter(wals, conf, HBASEDIR, fs, null, null) { 814 @Override 815 protected Writer createWriter(Path logfile) throws IOException { 816 Writer mockWriter = Mockito.mock(Writer.class); 817 Mockito.doThrow(new IOException("Injected")).when( 818 mockWriter).append(Mockito.<Entry>any()); 819 return mockWriter; 820 } 821 }; 822 // Set up a background thread dumper. Needs a thread to depend on and then we need to run 823 // the thread dumping in a background thread so it does not hold up the test. 824 final AtomicBoolean stop = new AtomicBoolean(false); 825 final Thread someOldThread = new Thread("Some-old-thread") { 826 @Override 827 public void run() { 828 while(!stop.get()) Threads.sleep(10); 829 } 830 }; 831 someOldThread.setDaemon(true); 832 someOldThread.start(); 833 final Thread t = new Thread("Background-thread-dumper") { 834 @Override 835 public void run() { 836 try { 837 Threads.threadDumpingIsAlive(someOldThread); 838 } catch (InterruptedException e) { 839 e.printStackTrace(); 840 } 841 } 842 }; 843 t.setDaemon(true); 844 t.start(); 845 try { 846 logSplitter.splitLogFile(logfiles[largestLogFile], null); 847 fail("Didn't throw!"); 848 } catch (IOException ioe) { 849 assertTrue(ioe.toString().contains("Injected")); 850 } finally { 851 // Setting this to true will turn off the background thread dumper. 852 stop.set(true); 853 } 854 } 855 856 /** 857 * @param spiedFs should be instrumented for failure. 858 */ 859 private void retryOverHdfsProblem(final FileSystem spiedFs) throws Exception { 860 generateWALs(-1); 861 useDifferentDFSClient(); 862 863 try { 864 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, spiedFs, conf, wals); 865 assertEquals(NUM_WRITERS, fs.listStatus(OLDLOGDIR).length); 866 assertFalse(fs.exists(WALDIR)); 867 } catch (IOException e) { 868 fail("There shouldn't be any exception but: " + e.toString()); 869 } 870 } 871 872 // Test for HBASE-3412 873 @Test 874 public void testMovedWALDuringRecovery() throws Exception { 875 // This partial mock will throw LEE for every file simulating 876 // files that were moved 877 FileSystem spiedFs = Mockito.spy(fs); 878 // The "File does not exist" part is very important, 879 // that's how it comes out of HDFS 880 Mockito.doThrow(new LeaseExpiredException("Injected: File does not exist")). 881 when(spiedFs).append(Mockito.<Path>any()); 882 retryOverHdfsProblem(spiedFs); 883 } 884 885 @Test 886 public void testRetryOpenDuringRecovery() throws Exception { 887 FileSystem spiedFs = Mockito.spy(fs); 888 // The "Cannot obtain block length", "Could not obtain the last block", 889 // and "Blocklist for [^ ]* has changed.*" part is very important, 890 // that's how it comes out of HDFS. If HDFS changes the exception 891 // message, this test needs to be adjusted accordingly. 892 // 893 // When DFSClient tries to open a file, HDFS needs to locate 894 // the last block of the file and get its length. However, if the 895 // last block is under recovery, HDFS may have problem to obtain 896 // the block length, in which case, retry may help. 897 Mockito.doAnswer(new Answer<FSDataInputStream>() { 898 private final String[] errors = new String[] { 899 "Cannot obtain block length", "Could not obtain the last block", 900 "Blocklist for " + OLDLOGDIR + " has changed"}; 901 private int count = 0; 902 903 @Override 904 public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable { 905 if (count < 3) { 906 throw new IOException(errors[count++]); 907 } 908 return (FSDataInputStream)invocation.callRealMethod(); 909 } 910 }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt()); 911 retryOverHdfsProblem(spiedFs); 912 } 913 914 @Test 915 public void testTerminationAskedByReporter() throws IOException, CorruptedLogFileException { 916 generateWALs(1, 10, -1); 917 FileStatus logfile = fs.listStatus(WALDIR)[0]; 918 useDifferentDFSClient(); 919 920 final AtomicInteger count = new AtomicInteger(); 921 922 CancelableProgressable localReporter 923 = new CancelableProgressable() { 924 @Override 925 public boolean progress() { 926 count.getAndIncrement(); 927 return false; 928 } 929 }; 930 931 FileSystem spiedFs = Mockito.spy(fs); 932 Mockito.doAnswer(new Answer<FSDataInputStream>() { 933 @Override 934 public FSDataInputStream answer(InvocationOnMock invocation) throws Throwable { 935 Thread.sleep(1500); // Sleep a while and wait report status invoked 936 return (FSDataInputStream)invocation.callRealMethod(); 937 } 938 }).when(spiedFs).open(Mockito.<Path>any(), Mockito.anyInt()); 939 940 try { 941 conf.setInt("hbase.splitlog.report.period", 1000); 942 boolean ret = WALSplitter.splitLogFile( 943 HBASEDIR, logfile, spiedFs, conf, localReporter, null, null, wals); 944 assertFalse("Log splitting should failed", ret); 945 assertTrue(count.get() > 0); 946 } catch (IOException e) { 947 fail("There shouldn't be any exception but: " + e.toString()); 948 } finally { 949 // reset it back to its default value 950 conf.setInt("hbase.splitlog.report.period", 59000); 951 } 952 } 953 954 /** 955 * Test log split process with fake data and lots of edits to trigger threading 956 * issues. 957 */ 958 @Test 959 public void testThreading() throws Exception { 960 doTestThreading(20000, 128*1024*1024, 0); 961 } 962 963 /** 964 * Test blocking behavior of the log split process if writers are writing slower 965 * than the reader is reading. 966 */ 967 @Test 968 public void testThreadingSlowWriterSmallBuffer() throws Exception { 969 doTestThreading(200, 1024, 50); 970 } 971 972 /** 973 * Sets up a log splitter with a mock reader and writer. The mock reader generates 974 * a specified number of edits spread across 5 regions. The mock writer optionally 975 * sleeps for each edit it is fed. 976 * * 977 * After the split is complete, verifies that the statistics show the correct number 978 * of edits output into each region. 979 * 980 * @param numFakeEdits number of fake edits to push through pipeline 981 * @param bufferSize size of in-memory buffer 982 * @param writerSlowness writer threads will sleep this many ms per edit 983 */ 984 private void doTestThreading(final int numFakeEdits, 985 final int bufferSize, 986 final int writerSlowness) throws Exception { 987 988 Configuration localConf = new Configuration(conf); 989 localConf.setInt("hbase.regionserver.hlog.splitlog.buffersize", bufferSize); 990 991 // Create a fake log file (we'll override the reader to produce a stream of edits) 992 Path logPath = new Path(WALDIR, WAL_FILE_PREFIX + ".fake"); 993 FSDataOutputStream out = fs.create(logPath); 994 out.close(); 995 996 // Make region dirs for our destination regions so the output doesn't get skipped 997 final List<String> regions = ImmutableList.of("r0", "r1", "r2", "r3", "r4"); 998 makeRegionDirs(regions); 999 1000 // Create a splitter that reads and writes the data without touching disk 1001 WALSplitter logSplitter = new WALSplitter(wals, localConf, HBASEDIR, fs, null, null) { 1002 1003 /* Produce a mock writer that doesn't write anywhere */ 1004 @Override 1005 protected Writer createWriter(Path logfile) throws IOException { 1006 Writer mockWriter = Mockito.mock(Writer.class); 1007 Mockito.doAnswer(new Answer<Void>() { 1008 int expectedIndex = 0; 1009 1010 @Override 1011 public Void answer(InvocationOnMock invocation) { 1012 if (writerSlowness > 0) { 1013 try { 1014 Thread.sleep(writerSlowness); 1015 } catch (InterruptedException ie) { 1016 Thread.currentThread().interrupt(); 1017 } 1018 } 1019 Entry entry = (Entry) invocation.getArgument(0); 1020 WALEdit edit = entry.getEdit(); 1021 List<Cell> cells = edit.getCells(); 1022 assertEquals(1, cells.size()); 1023 Cell cell = cells.get(0); 1024 1025 // Check that the edits come in the right order. 1026 assertEquals(expectedIndex, Bytes.toInt(cell.getRowArray(), cell.getRowOffset(), 1027 cell.getRowLength())); 1028 expectedIndex++; 1029 return null; 1030 } 1031 }).when(mockWriter).append(Mockito.<Entry>any()); 1032 return mockWriter; 1033 } 1034 1035 /* Produce a mock reader that generates fake entries */ 1036 @Override 1037 protected Reader getReader(Path curLogFile, CancelableProgressable reporter) 1038 throws IOException { 1039 Reader mockReader = Mockito.mock(Reader.class); 1040 Mockito.doAnswer(new Answer<Entry>() { 1041 int index = 0; 1042 1043 @Override 1044 public Entry answer(InvocationOnMock invocation) throws Throwable { 1045 if (index >= numFakeEdits) return null; 1046 1047 // Generate r0 through r4 in round robin fashion 1048 int regionIdx = index % regions.size(); 1049 byte region[] = new byte[] {(byte)'r', (byte) (0x30 + regionIdx)}; 1050 1051 Entry ret = createTestEntry(TABLE_NAME, region, 1052 Bytes.toBytes(index / regions.size()), 1053 FAMILY, QUALIFIER, VALUE, index); 1054 index++; 1055 return ret; 1056 } 1057 }).when(mockReader).next(); 1058 return mockReader; 1059 } 1060 }; 1061 1062 logSplitter.splitLogFile(fs.getFileStatus(logPath), null); 1063 1064 // Verify number of written edits per region 1065 Map<byte[], Long> outputCounts = logSplitter.outputSink.getOutputCounts(); 1066 for (Map.Entry<byte[], Long> entry : outputCounts.entrySet()) { 1067 LOG.info("Got " + entry.getValue() + " output edits for region " + 1068 Bytes.toString(entry.getKey())); 1069 assertEquals((long)entry.getValue(), numFakeEdits / regions.size()); 1070 } 1071 assertEquals("Should have as many outputs as regions", regions.size(), outputCounts.size()); 1072 } 1073 1074 // Does leaving the writer open in testSplitDeletedRegion matter enough for two tests? 1075 @Test 1076 public void testSplitLogFileDeletedRegionDir() throws IOException { 1077 LOG.info("testSplitLogFileDeletedRegionDir"); 1078 final String REGION = "region__1"; 1079 REGIONS.clear(); 1080 REGIONS.add(REGION); 1081 1082 generateWALs(1, 10, -1); 1083 useDifferentDFSClient(); 1084 1085 Path regiondir = new Path(TABLEDIR, REGION); 1086 LOG.info("Region directory is" + regiondir); 1087 fs.delete(regiondir, true); 1088 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 1089 assertFalse(fs.exists(regiondir)); 1090 } 1091 1092 @Test 1093 public void testSplitLogFileEmpty() throws IOException { 1094 LOG.info("testSplitLogFileEmpty"); 1095 // we won't create the hlog dir until getWAL got called, so 1096 // make dir here when testing empty log file 1097 fs.mkdirs(WALDIR); 1098 injectEmptyFile(".empty", true); 1099 useDifferentDFSClient(); 1100 1101 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 1102 Path tdir = FSUtils.getTableDir(HBASEDIR, TABLE_NAME); 1103 assertFalse(fs.exists(tdir)); 1104 1105 assertEquals(0, countWAL(fs.listStatus(OLDLOGDIR)[0].getPath())); 1106 } 1107 1108 @Test 1109 public void testSplitLogFileMultipleRegions() throws IOException { 1110 LOG.info("testSplitLogFileMultipleRegions"); 1111 generateWALs(1, 10, -1); 1112 splitAndCount(1, 10); 1113 } 1114 1115 @Test 1116 public void testSplitLogFileFirstLineCorruptionLog() 1117 throws IOException { 1118 conf.setBoolean(HBASE_SKIP_ERRORS, true); 1119 generateWALs(1, 10, -1); 1120 FileStatus logfile = fs.listStatus(WALDIR)[0]; 1121 1122 corruptWAL(logfile.getPath(), 1123 Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true); 1124 1125 useDifferentDFSClient(); 1126 WALSplitter.split(HBASELOGDIR, WALDIR, OLDLOGDIR, fs, conf, wals); 1127 1128 final Path corruptDir = new Path(FSUtils.getWALRootDir(conf), HConstants.CORRUPT_DIR_NAME); 1129 assertEquals(1, fs.listStatus(corruptDir).length); 1130 } 1131 1132 /** 1133 * {@see https://issues.apache.org/jira/browse/HBASE-4862} 1134 */ 1135 @Test 1136 public void testConcurrentSplitLogAndReplayRecoverEdit() throws IOException { 1137 LOG.info("testConcurrentSplitLogAndReplayRecoverEdit"); 1138 // Generate wals for our destination region 1139 String regionName = "r0"; 1140 final Path regiondir = new Path(TABLEDIR, regionName); 1141 REGIONS.clear(); 1142 REGIONS.add(regionName); 1143 generateWALs(-1); 1144 1145 wals.getWAL(null); 1146 FileStatus[] logfiles = fs.listStatus(WALDIR); 1147 assertTrue("There should be some log file", 1148 logfiles != null && logfiles.length > 0); 1149 1150 WALSplitter logSplitter = new WALSplitter(wals, conf, HBASEDIR, fs, null, null) { 1151 @Override 1152 protected Writer createWriter(Path logfile) 1153 throws IOException { 1154 Writer writer = wals.createRecoveredEditsWriter(this.walFS, logfile); 1155 // After creating writer, simulate region's 1156 // replayRecoveredEditsIfAny() which gets SplitEditFiles of this 1157 // region and delete them, excluding files with '.temp' suffix. 1158 NavigableSet<Path> files = WALSplitter.getSplitEditFilesSorted(fs, regiondir); 1159 if (files != null && !files.isEmpty()) { 1160 for (Path file : files) { 1161 if (!this.walFS.delete(file, false)) { 1162 LOG.error("Failed delete of " + file); 1163 } else { 1164 LOG.debug("Deleted recovered.edits file=" + file); 1165 } 1166 } 1167 } 1168 return writer; 1169 } 1170 }; 1171 try{ 1172 logSplitter.splitLogFile(logfiles[0], null); 1173 } catch (IOException e) { 1174 LOG.info(e.toString(), e); 1175 fail("Throws IOException when spliting " 1176 + "log, it is most likely because writing file does not " 1177 + "exist which is caused by concurrent replayRecoveredEditsIfAny()"); 1178 } 1179 if (fs.exists(CORRUPTDIR)) { 1180 if (fs.listStatus(CORRUPTDIR).length > 0) { 1181 fail("There are some corrupt logs, " 1182 + "it is most likely caused by concurrent replayRecoveredEditsIfAny()"); 1183 } 1184 } 1185 } 1186 1187 private Writer generateWALs(int leaveOpen) throws IOException { 1188 return generateWALs(NUM_WRITERS, ENTRIES, leaveOpen, 0); 1189 } 1190 1191 private Writer generateWALs(int writers, int entries, int leaveOpen) throws IOException { 1192 return generateWALs(writers, entries, leaveOpen, 7); 1193 } 1194 1195 private void makeRegionDirs(List<String> regions) throws IOException { 1196 for (String region : regions) { 1197 LOG.debug("Creating dir for region " + region); 1198 fs.mkdirs(new Path(TABLEDIR, region)); 1199 } 1200 } 1201 1202 /** 1203 * @param leaveOpen index to leave un-closed. -1 to close all. 1204 * @return the writer that's still open, or null if all were closed. 1205 */ 1206 private Writer generateWALs(int writers, int entries, int leaveOpen, int regionEvents) throws IOException { 1207 makeRegionDirs(REGIONS); 1208 fs.mkdirs(WALDIR); 1209 Writer [] ws = new Writer[writers]; 1210 int seq = 0; 1211 int numRegionEventsAdded = 0; 1212 for (int i = 0; i < writers; i++) { 1213 ws[i] = wals.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + i)); 1214 for (int j = 0; j < entries; j++) { 1215 int prefix = 0; 1216 for (String region : REGIONS) { 1217 String row_key = region + prefix++ + i + j; 1218 appendEntry(ws[i], TABLE_NAME, Bytes.toBytes(region), Bytes.toBytes(row_key), FAMILY, 1219 QUALIFIER, VALUE, seq++); 1220 1221 if (numRegionEventsAdded < regionEvents) { 1222 numRegionEventsAdded ++; 1223 appendRegionEvent(ws[i], region); 1224 } 1225 } 1226 } 1227 if (i != leaveOpen) { 1228 ws[i].close(); 1229 LOG.info("Closing writer " + i); 1230 } 1231 } 1232 if (leaveOpen < 0 || leaveOpen >= writers) { 1233 return null; 1234 } 1235 return ws[leaveOpen]; 1236 } 1237 1238 1239 1240 private Path[] getLogForRegion(TableName table, String region) 1241 throws IOException { 1242 Path tdir = FSUtils.getWALTableDir(conf, table); 1243 @SuppressWarnings("deprecation") 1244 Path editsdir = WALSplitter.getRegionDirRecoveredEditsDir(HRegion.getRegionDir(tdir, 1245 Bytes.toString(Bytes.toBytes(region)))); 1246 FileStatus[] files = fs.listStatus(editsdir, new PathFilter() { 1247 @Override 1248 public boolean accept(Path p) { 1249 if (WALSplitter.isSequenceIdFile(p)) { 1250 return false; 1251 } 1252 return true; 1253 } 1254 }); 1255 Path[] paths = new Path[files.length]; 1256 for (int i = 0; i < files.length; i++) { 1257 paths[i] = files[i].getPath(); 1258 } 1259 return paths; 1260 } 1261 1262 private void corruptWAL(Path path, Corruptions corruption, boolean close) throws IOException { 1263 FSDataOutputStream out; 1264 int fileSize = (int) fs.listStatus(path)[0].getLen(); 1265 1266 FSDataInputStream in = fs.open(path); 1267 byte[] corrupted_bytes = new byte[fileSize]; 1268 in.readFully(0, corrupted_bytes, 0, fileSize); 1269 in.close(); 1270 1271 switch (corruption) { 1272 case APPEND_GARBAGE: 1273 fs.delete(path, false); 1274 out = fs.create(path); 1275 out.write(corrupted_bytes); 1276 out.write(Bytes.toBytes("-----")); 1277 closeOrFlush(close, out); 1278 break; 1279 1280 case INSERT_GARBAGE_ON_FIRST_LINE: 1281 fs.delete(path, false); 1282 out = fs.create(path); 1283 out.write(0); 1284 out.write(corrupted_bytes); 1285 closeOrFlush(close, out); 1286 break; 1287 1288 case INSERT_GARBAGE_IN_THE_MIDDLE: 1289 fs.delete(path, false); 1290 out = fs.create(path); 1291 int middle = (int) Math.floor(corrupted_bytes.length / 2); 1292 out.write(corrupted_bytes, 0, middle); 1293 out.write(0); 1294 out.write(corrupted_bytes, middle, corrupted_bytes.length - middle); 1295 closeOrFlush(close, out); 1296 break; 1297 1298 case TRUNCATE: 1299 fs.delete(path, false); 1300 out = fs.create(path); 1301 out.write(corrupted_bytes, 0, fileSize 1302 - (32 + ProtobufLogReader.PB_WAL_COMPLETE_MAGIC.length + Bytes.SIZEOF_INT)); 1303 closeOrFlush(close, out); 1304 break; 1305 1306 case TRUNCATE_TRAILER: 1307 fs.delete(path, false); 1308 out = fs.create(path); 1309 out.write(corrupted_bytes, 0, fileSize - Bytes.SIZEOF_INT);// trailer is truncated. 1310 closeOrFlush(close, out); 1311 break; 1312 } 1313 } 1314 1315 private void closeOrFlush(boolean close, FSDataOutputStream out) 1316 throws IOException { 1317 if (close) { 1318 out.close(); 1319 } else { 1320 Method syncMethod = null; 1321 try { 1322 syncMethod = out.getClass().getMethod("hflush", new Class<?> []{}); 1323 } catch (NoSuchMethodException e) { 1324 try { 1325 syncMethod = out.getClass().getMethod("sync", new Class<?> []{}); 1326 } catch (NoSuchMethodException ex) { 1327 throw new IOException("This version of Hadoop supports " + 1328 "neither Syncable.sync() nor Syncable.hflush()."); 1329 } 1330 } 1331 try { 1332 syncMethod.invoke(out, new Object[]{}); 1333 } catch (Exception e) { 1334 throw new IOException(e); 1335 } 1336 // Not in 0out.hflush(); 1337 } 1338 } 1339 1340 private int countWAL(Path log) throws IOException { 1341 int count = 0; 1342 Reader in = wals.createReader(fs, log); 1343 while (in.next() != null) { 1344 count++; 1345 } 1346 in.close(); 1347 return count; 1348 } 1349 1350 private static void appendCompactionEvent(Writer w, RegionInfo hri, String[] inputs, 1351 String output) throws IOException { 1352 WALProtos.CompactionDescriptor.Builder desc = WALProtos.CompactionDescriptor.newBuilder(); 1353 desc.setTableName(ByteString.copyFrom(hri.getTable().toBytes())) 1354 .setEncodedRegionName(ByteString.copyFrom(hri.getEncodedNameAsBytes())) 1355 .setRegionName(ByteString.copyFrom(hri.getRegionName())) 1356 .setFamilyName(ByteString.copyFrom(FAMILY)) 1357 .setStoreHomeDir(hri.getEncodedName() + "/" + Bytes.toString(FAMILY)) 1358 .addAllCompactionInput(Arrays.asList(inputs)) 1359 .addCompactionOutput(output); 1360 1361 WALEdit edit = WALEdit.createCompaction(hri, desc.build()); 1362 WALKeyImpl key = new WALKeyImpl(hri.getEncodedNameAsBytes(), TABLE_NAME, 1, 1363 EnvironmentEdgeManager.currentTime(), HConstants.DEFAULT_CLUSTER_ID); 1364 w.append(new Entry(key, edit)); 1365 w.sync(false); 1366 } 1367 1368 private static void appendRegionEvent(Writer w, String region) throws IOException { 1369 WALProtos.RegionEventDescriptor regionOpenDesc = ProtobufUtil.toRegionEventDescriptor( 1370 WALProtos.RegionEventDescriptor.EventType.REGION_OPEN, 1371 TABLE_NAME.toBytes(), 1372 Bytes.toBytes(region), 1373 Bytes.toBytes(String.valueOf(region.hashCode())), 1374 1, 1375 ServerName.parseServerName("ServerName:9099"), ImmutableMap.<byte[], List<Path>>of()); 1376 final long time = EnvironmentEdgeManager.currentTime(); 1377 KeyValue kv = new KeyValue(Bytes.toBytes(region), WALEdit.METAFAMILY, WALEdit.REGION_EVENT, 1378 time, regionOpenDesc.toByteArray()); 1379 final WALKeyImpl walKey = new WALKeyImpl(Bytes.toBytes(region), TABLE_NAME, 1, time, 1380 HConstants.DEFAULT_CLUSTER_ID); 1381 w.append( 1382 new Entry(walKey, new WALEdit().add(kv))); 1383 w.sync(false); 1384 } 1385 1386 public static long appendEntry(Writer writer, TableName table, byte[] region, 1387 byte[] row, byte[] family, byte[] qualifier, 1388 byte[] value, long seq) 1389 throws IOException { 1390 LOG.info(Thread.currentThread().getName() + " append"); 1391 writer.append(createTestEntry(table, region, row, family, qualifier, value, seq)); 1392 LOG.info(Thread.currentThread().getName() + " sync"); 1393 writer.sync(false); 1394 return seq; 1395 } 1396 1397 private static Entry createTestEntry( 1398 TableName table, byte[] region, 1399 byte[] row, byte[] family, byte[] qualifier, 1400 byte[] value, long seq) { 1401 long time = System.nanoTime(); 1402 1403 seq++; 1404 final KeyValue cell = new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value); 1405 WALEdit edit = new WALEdit(); 1406 edit.add(cell); 1407 return new Entry(new WALKeyImpl(region, table, seq, time, 1408 HConstants.DEFAULT_CLUSTER_ID), edit); 1409 } 1410 1411 private void injectEmptyFile(String suffix, boolean closeFile) throws IOException { 1412 Writer writer = 1413 WALFactory.createWALWriter(fs, new Path(WALDIR, WAL_FILE_PREFIX + suffix), conf); 1414 if (closeFile) { 1415 writer.close(); 1416 } 1417 } 1418 1419 private boolean logsAreEqual(Path p1, Path p2) throws IOException { 1420 Reader in1, in2; 1421 in1 = wals.createReader(fs, p1); 1422 in2 = wals.createReader(fs, p2); 1423 Entry entry1; 1424 Entry entry2; 1425 while ((entry1 = in1.next()) != null) { 1426 entry2 = in2.next(); 1427 if ((entry1.getKey().compareTo(entry2.getKey()) != 0) || 1428 (!entry1.getEdit().toString().equals(entry2.getEdit().toString()))) { 1429 return false; 1430 } 1431 } 1432 in1.close(); 1433 in2.close(); 1434 return true; 1435 } 1436}