001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.Collection;
025import java.util.List;
026import java.util.concurrent.CountDownLatch;
027import java.util.concurrent.atomic.AtomicInteger;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.client.Admin;
032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
033import org.apache.hadoop.hbase.client.RegionInfo;
034import org.apache.hadoop.hbase.client.Table;
035import org.apache.hadoop.hbase.client.TableDescriptor;
036import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
037import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy;
038import org.apache.hadoop.hbase.regionserver.HRegion;
039import org.apache.hadoop.hbase.regionserver.HRegionServer;
040import org.apache.hadoop.hbase.regionserver.HStore;
041import org.apache.hadoop.hbase.regionserver.HStoreFile;
042import org.apache.hadoop.hbase.regionserver.Region;
043import org.apache.hadoop.hbase.regionserver.RegionServerServices;
044import org.apache.hadoop.hbase.regionserver.Store;
045import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
046import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
047import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
048import org.apache.hadoop.hbase.security.User;
049import org.apache.hadoop.hbase.testclassification.LargeTests;
050import org.apache.hadoop.hbase.testclassification.MiscTests;
051import org.apache.hadoop.hbase.util.Bytes;
052import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
053import org.apache.hadoop.hbase.wal.WAL;
054import org.junit.ClassRule;
055import org.junit.Test;
056import org.junit.experimental.categories.Category;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
061
062import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor;
064
065/**
066 * Test for the case where a regionserver going down has enough cycles to do damage to regions that
067 * have actually been assigned elsehwere.
068 * <p>
069 * If we happen to assign a region before it fully done with in its old location -- i.e. it is on
070 * two servers at the same time -- all can work fine until the case where the region on the dying
071 * server decides to compact or otherwise change the region file set. The region in its new location
072 * will then get a surprise when it tries to do something w/ a file removed by the region in its old
073 * location on dying server.
074 * <p>
075 * Making a test for this case is a little tough in that even if a file is deleted up on the
076 * namenode, if the file was opened before the delete, it will continue to let reads happen until
077 * something changes the state of cached blocks in the dfsclient that was already open (a block from
078 * the deleted file is cleaned from the datanode by NN).
079 * <p>
080 * What we will do below is do an explicit check for existence on the files listed in the region
081 * that has had some files removed because of a compaction. This sort of hurry's along and makes
082 * certain what is a chance occurance.
083 */
084@Category({MiscTests.class, LargeTests.class})
085public class TestIOFencing {
086
087  @ClassRule
088  public static final HBaseClassTestRule CLASS_RULE =
089      HBaseClassTestRule.forClass(TestIOFencing.class);
090
091  private static final Logger LOG = LoggerFactory.getLogger(TestIOFencing.class);
092  static {
093    // Uncomment the following lines if more verbosity is needed for
094    // debugging (see HBASE-12285 for details).
095    //((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
096    //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
097    //((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
098    //((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
099    //    .getLogger().setLevel(Level.ALL);
100    //((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
101  }
102
103  public abstract static class CompactionBlockerRegion extends HRegion {
104    AtomicInteger compactCount = new AtomicInteger();
105    volatile CountDownLatch compactionsBlocked = new CountDownLatch(0);
106    volatile CountDownLatch compactionsWaiting = new CountDownLatch(0);
107
108    @SuppressWarnings("deprecation")
109    public CompactionBlockerRegion(Path tableDir, WAL log,
110        FileSystem fs, Configuration confParam, RegionInfo info,
111        TableDescriptor htd, RegionServerServices rsServices) {
112      super(tableDir, log, fs, confParam, info, htd, rsServices);
113    }
114
115    public void stopCompactions() {
116      compactionsBlocked = new CountDownLatch(1);
117      compactionsWaiting = new CountDownLatch(1);
118    }
119
120    public void allowCompactions() {
121      LOG.debug("allowing compactions");
122      compactionsBlocked.countDown();
123    }
124    public void waitForCompactionToBlock() throws IOException {
125      try {
126        LOG.debug("waiting for compaction to block");
127        compactionsWaiting.await();
128        LOG.debug("compaction block reached");
129      } catch (InterruptedException ex) {
130        throw new IOException(ex);
131      }
132    }
133
134    @Override
135    public boolean compact(CompactionContext compaction, HStore store,
136        ThroughputController throughputController) throws IOException {
137      try {
138        return super.compact(compaction, store, throughputController);
139      } finally {
140        compactCount.getAndIncrement();
141      }
142    }
143
144    @Override
145    public boolean compact(CompactionContext compaction, HStore store,
146        ThroughputController throughputController, User user) throws IOException {
147      try {
148        return super.compact(compaction, store, throughputController, user);
149      } finally {
150        compactCount.getAndIncrement();
151      }
152    }
153
154    public int countStoreFiles() {
155      int count = 0;
156      for (HStore store : stores.values()) {
157        count += store.getStorefilesCount();
158      }
159      return count;
160    }
161  }
162
163  /**
164   * An override of HRegion that allows us park compactions in a holding pattern and
165   * then when appropriate for the test, allow them proceed again.
166   */
167  public static class BlockCompactionsInPrepRegion extends CompactionBlockerRegion {
168
169    public BlockCompactionsInPrepRegion(Path tableDir, WAL log,
170        FileSystem fs, Configuration confParam, RegionInfo info,
171        TableDescriptor htd, RegionServerServices rsServices) {
172      super(tableDir, log, fs, confParam, info, htd, rsServices);
173    }
174    @Override
175    protected void doRegionCompactionPrep() throws IOException {
176      compactionsWaiting.countDown();
177      try {
178        compactionsBlocked.await();
179      } catch (InterruptedException ex) {
180        throw new IOException();
181      }
182      super.doRegionCompactionPrep();
183    }
184  }
185
186  /**
187   * An override of HRegion that allows us park compactions in a holding pattern and
188   * then when appropriate for the test, allow them proceed again. This allows the compaction
189   * entry to go the WAL before blocking, but blocks afterwards
190   */
191  public static class BlockCompactionsInCompletionRegion extends CompactionBlockerRegion {
192    public BlockCompactionsInCompletionRegion(Path tableDir, WAL log,
193        FileSystem fs, Configuration confParam, RegionInfo info,
194        TableDescriptor htd, RegionServerServices rsServices) {
195      super(tableDir, log, fs, confParam, info, htd, rsServices);
196    }
197    @Override
198    protected HStore instantiateHStore(final ColumnFamilyDescriptor family) throws IOException {
199      return new BlockCompactionsInCompletionHStore(this, family, this.conf);
200    }
201  }
202
203  public static class BlockCompactionsInCompletionHStore extends HStore {
204    CompactionBlockerRegion r;
205    protected BlockCompactionsInCompletionHStore(HRegion region, ColumnFamilyDescriptor family,
206        Configuration confParam) throws IOException {
207      super(region, family, confParam);
208      r = (CompactionBlockerRegion) region;
209    }
210
211    @Override
212    protected void completeCompaction(Collection<HStoreFile> compactedFiles) throws IOException {
213      try {
214        r.compactionsWaiting.countDown();
215        r.compactionsBlocked.await();
216      } catch (InterruptedException ex) {
217        throw new IOException(ex);
218      }
219      super.completeCompaction(compactedFiles);
220    }
221  }
222
223  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
224  private final static TableName TABLE_NAME =
225      TableName.valueOf("tabletest");
226  private final static byte[] FAMILY = Bytes.toBytes("family");
227  private static final int FIRST_BATCH_COUNT = 4000;
228  private static final int SECOND_BATCH_COUNT = FIRST_BATCH_COUNT;
229
230  /**
231   * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
232   * compaction until after we have killed the server and the region has come up on
233   * a new regionserver altogether.  This fakes the double assignment case where region in one
234   * location changes the files out from underneath a region being served elsewhere.
235   */
236  @Test
237  public void testFencingAroundCompaction() throws Exception {
238    for(MemoryCompactionPolicy policy : MemoryCompactionPolicy.values()) {
239      doTest(BlockCompactionsInPrepRegion.class, policy);
240    }
241  }
242
243  /**
244   * Test that puts up a regionserver, starts a compaction on a loaded region but holds the
245   * compaction completion until after we have killed the server and the region has come up on
246   * a new regionserver altogether.  This fakes the double assignment case where region in one
247   * location changes the files out from underneath a region being served elsewhere.
248   */
249  @Test
250  public void testFencingAroundCompactionAfterWALSync() throws Exception {
251    for(MemoryCompactionPolicy policy : MemoryCompactionPolicy.values()) {
252      doTest(BlockCompactionsInCompletionRegion.class, policy);
253    }
254  }
255
256  public void doTest(Class<?> regionClass, MemoryCompactionPolicy policy) throws Exception {
257    Configuration c = TEST_UTIL.getConfiguration();
258    // Insert our custom region
259    c.setClass(HConstants.REGION_IMPL, regionClass, HRegion.class);
260    // Encourage plenty of flushes
261    c.setLong("hbase.hregion.memstore.flush.size", 25000);
262    c.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName());
263    // Only run compaction when we tell it to
264    c.setInt("hbase.hstore.compaction.min",1);
265    c.setInt("hbase.hstore.compactionThreshold", 1000);
266    c.setLong("hbase.hstore.blockingStoreFiles", 1000);
267    // Compact quickly after we tell it to!
268    c.setInt("hbase.regionserver.thread.splitcompactcheckfrequency", 1000);
269    c.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, String.valueOf(policy));
270    LOG.info("Starting mini cluster");
271    TEST_UTIL.startMiniCluster(1);
272    CompactionBlockerRegion compactingRegion = null;
273    Admin admin = null;
274    try {
275      LOG.info("Creating admin");
276      admin = TEST_UTIL.getConnection().getAdmin();
277      LOG.info("Creating table");
278      TEST_UTIL.createTable(TABLE_NAME, FAMILY);
279      Table table = TEST_UTIL.getConnection().getTable(TABLE_NAME);
280      LOG.info("Loading test table");
281      // Find the region
282      List<HRegion> testRegions = TEST_UTIL.getMiniHBaseCluster().findRegionsForTable(TABLE_NAME);
283      assertEquals(1, testRegions.size());
284      compactingRegion = (CompactionBlockerRegion)testRegions.get(0);
285      LOG.info("Blocking compactions");
286      compactingRegion.stopCompactions();
287      long lastFlushTime = compactingRegion.getEarliestFlushTimeForAllStores();
288      // Load some rows
289      TEST_UTIL.loadNumericRows(table, FAMILY, 0, FIRST_BATCH_COUNT);
290
291      // add a compaction from an older (non-existing) region to see whether we successfully skip
292      // those entries
293      HRegionInfo oldHri = new HRegionInfo(table.getName(),
294        HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
295      CompactionDescriptor compactionDescriptor = ProtobufUtil.toCompactionDescriptor(oldHri,
296        FAMILY, Lists.newArrayList(new Path("/a")), Lists.newArrayList(new Path("/b")),
297        new Path("store_dir"));
298      WALUtil.writeCompactionMarker(compactingRegion.getWAL(),
299          ((HRegion)compactingRegion).getReplicationScope(),
300        oldHri, compactionDescriptor, compactingRegion.getMVCC());
301
302      // Wait till flush has happened, otherwise there won't be multiple store files
303      long startWaitTime = System.currentTimeMillis();
304      while (compactingRegion.getEarliestFlushTimeForAllStores() <= lastFlushTime ||
305          compactingRegion.countStoreFiles() <= 1) {
306        LOG.info("Waiting for the region to flush " +
307          compactingRegion.getRegionInfo().getRegionNameAsString());
308        Thread.sleep(1000);
309        admin.flush(table.getName());
310        assertTrue("Timed out waiting for the region to flush",
311          System.currentTimeMillis() - startWaitTime < 30000);
312      }
313      assertTrue(compactingRegion.countStoreFiles() > 1);
314      final byte REGION_NAME[] = compactingRegion.getRegionInfo().getRegionName();
315      LOG.info("Asking for compaction");
316      admin.majorCompact(TABLE_NAME);
317      LOG.info("Waiting for compaction to be about to start");
318      compactingRegion.waitForCompactionToBlock();
319      LOG.info("Starting a new server");
320      RegionServerThread newServerThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer();
321      final HRegionServer newServer = newServerThread.getRegionServer();
322      LOG.info("Killing region server ZK lease");
323      TEST_UTIL.expireRegionServerSession(0);
324      CompactionBlockerRegion newRegion = null;
325      startWaitTime = System.currentTimeMillis();
326      LOG.info("Waiting for the new server to pick up the region " + Bytes.toString(REGION_NAME));
327
328      // wait for region to be assigned and to go out of log replay if applicable
329      Waiter.waitFor(c, 60000, new Waiter.Predicate<Exception>() {
330        @Override
331        public boolean evaluate() throws Exception {
332          Region newRegion = newServer.getOnlineRegion(REGION_NAME);
333          return newRegion != null;
334        }
335      });
336
337      newRegion = (CompactionBlockerRegion)newServer.getOnlineRegion(REGION_NAME);
338
339      // After compaction of old region finishes on the server that was going down, make sure that
340      // all the files we expect are still working when region is up in new location.
341      FileSystem fs = newRegion.getFilesystem();
342      for (String f: newRegion.getStoreFileList(new byte [][] {FAMILY})) {
343        assertTrue("After compaction, does not exist: " + f, fs.exists(new Path(f)));
344      }
345      LOG.info("Allowing compaction to proceed");
346      compactingRegion.allowCompactions();
347      while (compactingRegion.compactCount.get() == 0) {
348        Thread.sleep(1000);
349      }
350      // The server we killed stays up until the compaction that was started before it was killed
351      // completes. In logs you should see the old regionserver now going down.
352      LOG.info("Compaction finished");
353
354      // If we survive the split keep going...
355      // Now we make sure that the region isn't totally confused.  Load up more rows.
356      TEST_UTIL.loadNumericRows(table, FAMILY, FIRST_BATCH_COUNT,
357        FIRST_BATCH_COUNT + SECOND_BATCH_COUNT);
358      admin.majorCompact(TABLE_NAME);
359      startWaitTime = System.currentTimeMillis();
360      while (newRegion.compactCount.get() == 0) {
361        Thread.sleep(1000);
362        assertTrue("New region never compacted",
363          System.currentTimeMillis() - startWaitTime < 180000);
364      }
365      int count;
366      for (int i = 0;; i++) {
367        try {
368          count = TEST_UTIL.countRows(table);
369          break;
370        } catch (DoNotRetryIOException e) {
371          // wait up to 30s
372          if (i >= 30 || !e.getMessage().contains("File does not exist")) {
373            throw e;
374          }
375          Thread.sleep(1000);
376        }
377      }
378      if (policy == MemoryCompactionPolicy.EAGER || policy == MemoryCompactionPolicy.ADAPTIVE) {
379        assertTrue(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT >= count);
380      } else {
381        assertEquals(FIRST_BATCH_COUNT + SECOND_BATCH_COUNT, count);
382      }
383    } finally {
384      if (compactingRegion != null) {
385        compactingRegion.allowCompactions();
386      }
387      admin.close();
388      TEST_UTIL.shutdownMiniCluster();
389    }
390  }
391}