001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.replication; 019 020import static org.junit.Assert.assertArrayEquals; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.fail; 024 025import java.io.IOException; 026import java.util.Arrays; 027import java.util.concurrent.CountDownLatch; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.*; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.client.Admin; 033import org.apache.hadoop.hbase.client.Delete; 034import org.apache.hadoop.hbase.client.Get; 035import org.apache.hadoop.hbase.client.Put; 036import org.apache.hadoop.hbase.client.Result; 037import org.apache.hadoop.hbase.client.Table; 038import org.apache.hadoop.hbase.client.replication.ReplicationAdmin; 039import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 040import org.apache.hadoop.hbase.regionserver.HRegion; 041import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 042import org.apache.hadoop.hbase.testclassification.LargeTests; 043import org.apache.hadoop.hbase.testclassification.ReplicationTests; 044import org.apache.hadoop.hbase.util.Bytes; 045import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster; 046import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 047import org.junit.BeforeClass; 048import org.junit.ClassRule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.slf4j.Logger; 052import org.slf4j.LoggerFactory; 053 054@Category({ReplicationTests.class, LargeTests.class}) 055public class TestMultiSlaveReplication { 056 057 @ClassRule 058 public static final HBaseClassTestRule CLASS_RULE = 059 HBaseClassTestRule.forClass(TestMultiSlaveReplication.class); 060 061 private static final Logger LOG = LoggerFactory.getLogger(TestMultiSlaveReplication.class); 062 063 private static Configuration conf1; 064 private static Configuration conf2; 065 private static Configuration conf3; 066 067 private static HBaseTestingUtility utility1; 068 private static HBaseTestingUtility utility2; 069 private static HBaseTestingUtility utility3; 070 private static final long SLEEP_TIME = 500; 071 private static final int NB_RETRIES = 100; 072 073 private static final TableName tableName = TableName.valueOf("test"); 074 private static final byte[] famName = Bytes.toBytes("f"); 075 private static final byte[] row = Bytes.toBytes("row"); 076 private static final byte[] row1 = Bytes.toBytes("row1"); 077 private static final byte[] row2 = Bytes.toBytes("row2"); 078 private static final byte[] row3 = Bytes.toBytes("row3"); 079 private static final byte[] noRepfamName = Bytes.toBytes("norep"); 080 081 private static HTableDescriptor table; 082 083 @BeforeClass 084 public static void setUpBeforeClass() throws Exception { 085 conf1 = HBaseConfiguration.create(); 086 conf1.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1"); 087 // smaller block size and capacity to trigger more operations 088 // and test them 089 conf1.setInt("hbase.regionserver.hlog.blocksize", 1024*20); 090 conf1.setInt("replication.source.size.capacity", 1024); 091 conf1.setLong("replication.source.sleepforretries", 100); 092 conf1.setInt("hbase.regionserver.maxlogs", 10); 093 conf1.setLong("hbase.master.logcleaner.ttl", 10); 094 conf1.setLong(HConstants.THREAD_WAKE_FREQUENCY, 100); 095 conf1.setStrings(CoprocessorHost.USER_REGION_COPROCESSOR_CONF_KEY, 096 "org.apache.hadoop.hbase.replication.TestMasterReplication$CoprocessorCounter"); 097 conf1.setInt("hbase.master.cleaner.interval", 5 * 1000); 098 099 utility1 = new HBaseTestingUtility(conf1); 100 utility1.startMiniZKCluster(); 101 MiniZooKeeperCluster miniZK = utility1.getZkCluster(); 102 utility1.setZkCluster(miniZK); 103 new ZKWatcher(conf1, "cluster1", null, true); 104 105 conf2 = new Configuration(conf1); 106 conf2.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/2"); 107 108 conf3 = new Configuration(conf1); 109 conf3.set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/3"); 110 111 utility2 = new HBaseTestingUtility(conf2); 112 utility2.setZkCluster(miniZK); 113 new ZKWatcher(conf2, "cluster2", null, true); 114 115 utility3 = new HBaseTestingUtility(conf3); 116 utility3.setZkCluster(miniZK); 117 new ZKWatcher(conf3, "cluster3", null, true); 118 119 table = new HTableDescriptor(tableName); 120 HColumnDescriptor fam = new HColumnDescriptor(famName); 121 fam.setScope(HConstants.REPLICATION_SCOPE_GLOBAL); 122 table.addFamily(fam); 123 fam = new HColumnDescriptor(noRepfamName); 124 table.addFamily(fam); 125 } 126 127 @Test 128 public void testMultiSlaveReplication() throws Exception { 129 LOG.info("testCyclicReplication"); 130 MiniHBaseCluster master = utility1.startMiniCluster(); 131 utility2.startMiniCluster(); 132 utility3.startMiniCluster(); 133 ReplicationAdmin admin1 = new ReplicationAdmin(conf1); 134 135 utility1.getAdmin().createTable(table); 136 utility2.getAdmin().createTable(table); 137 utility3.getAdmin().createTable(table); 138 Table htable1 = utility1.getConnection().getTable(tableName); 139 Table htable2 = utility2.getConnection().getTable(tableName); 140 Table htable3 = utility3.getConnection().getTable(tableName); 141 142 ReplicationPeerConfig rpc = new ReplicationPeerConfig(); 143 rpc.setClusterKey(utility2.getClusterKey()); 144 admin1.addPeer("1", rpc, null); 145 146 // put "row" and wait 'til it got around, then delete 147 putAndWait(row, famName, htable1, htable2); 148 deleteAndWait(row, htable1, htable2); 149 // check it wasn't replication to cluster 3 150 checkRow(row,0,htable3); 151 152 putAndWait(row2, famName, htable1, htable2); 153 154 // now roll the region server's logs 155 rollWALAndWait(utility1, htable1.getName(), row2); 156 157 // after the log was rolled put a new row 158 putAndWait(row3, famName, htable1, htable2); 159 160 rpc = new ReplicationPeerConfig(); 161 rpc.setClusterKey(utility3.getClusterKey()); 162 admin1.addPeer("2", rpc, null); 163 164 // put a row, check it was replicated to all clusters 165 putAndWait(row1, famName, htable1, htable2, htable3); 166 // delete and verify 167 deleteAndWait(row1, htable1, htable2, htable3); 168 169 // make sure row2 did not get replicated after 170 // cluster 3 was added 171 checkRow(row2,0,htable3); 172 173 // row3 will get replicated, because it was in the 174 // latest log 175 checkRow(row3,1,htable3); 176 177 Put p = new Put(row); 178 p.addColumn(famName, row, row); 179 htable1.put(p); 180 // now roll the logs again 181 rollWALAndWait(utility1, htable1.getName(), row); 182 183 // cleanup "row2", also conveniently use this to wait replication 184 // to finish 185 deleteAndWait(row2, htable1, htable2, htable3); 186 // Even if the log was rolled in the middle of the replication 187 // "row" is still replication. 188 checkRow(row, 1, htable2); 189 // Replication thread of cluster 2 may be sleeping, and since row2 is not there in it, 190 // we should wait before checking. 191 checkWithWait(row, 1, htable3); 192 193 // cleanup the rest 194 deleteAndWait(row, htable1, htable2, htable3); 195 deleteAndWait(row3, htable1, htable2, htable3); 196 197 utility3.shutdownMiniCluster(); 198 utility2.shutdownMiniCluster(); 199 utility1.shutdownMiniCluster(); 200 } 201 202 private void rollWALAndWait(final HBaseTestingUtility utility, final TableName table, 203 final byte[] row) throws IOException { 204 final Admin admin = utility.getAdmin(); 205 final MiniHBaseCluster cluster = utility.getMiniHBaseCluster(); 206 207 // find the region that corresponds to the given row. 208 HRegion region = null; 209 for (HRegion candidate : cluster.getRegions(table)) { 210 if (HRegion.rowIsInRange(candidate.getRegionInfo(), row)) { 211 region = candidate; 212 break; 213 } 214 } 215 assertNotNull("Couldn't find the region for row '" + Arrays.toString(row) + "'", region); 216 217 final CountDownLatch latch = new CountDownLatch(1); 218 219 // listen for successful log rolls 220 final WALActionsListener listener = new WALActionsListener() { 221 @Override 222 public void postLogRoll(final Path oldPath, final Path newPath) throws IOException { 223 latch.countDown(); 224 } 225 }; 226 region.getWAL().registerWALActionsListener(listener); 227 228 // request a roll 229 admin.rollWALWriter(cluster.getServerHoldingRegion(region.getTableDescriptor().getTableName(), 230 region.getRegionInfo().getRegionName())); 231 232 // wait 233 try { 234 latch.await(); 235 } catch (InterruptedException exception) { 236 LOG.warn("Interrupted while waiting for the wal of '" + region + "' to roll. If later " + 237 "replication tests fail, it's probably because we should still be waiting."); 238 Thread.currentThread().interrupt(); 239 } 240 region.getWAL().unregisterWALActionsListener(listener); 241 } 242 243 244 private void checkWithWait(byte[] row, int count, Table table) throws Exception { 245 Get get = new Get(row); 246 for (int i = 0; i < NB_RETRIES; i++) { 247 if (i == NB_RETRIES - 1) { 248 fail("Waited too much time while getting the row."); 249 } 250 boolean rowReplicated = false; 251 Result res = table.get(get); 252 if (res.size() >= 1) { 253 LOG.info("Row is replicated"); 254 rowReplicated = true; 255 assertEquals("Table '" + table + "' did not have the expected number of results.", 256 count, res.size()); 257 break; 258 } 259 if (rowReplicated) { 260 break; 261 } else { 262 Thread.sleep(SLEEP_TIME); 263 } 264 } 265 } 266 267 private void checkRow(byte[] row, int count, Table... tables) throws IOException { 268 Get get = new Get(row); 269 for (Table table : tables) { 270 Result res = table.get(get); 271 assertEquals("Table '" + table + "' did not have the expected number of results.", 272 count, res.size()); 273 } 274 } 275 276 private void deleteAndWait(byte[] row, Table source, Table... targets) 277 throws Exception { 278 Delete del = new Delete(row); 279 source.delete(del); 280 281 Get get = new Get(row); 282 for (int i = 0; i < NB_RETRIES; i++) { 283 if (i==NB_RETRIES-1) { 284 fail("Waited too much time for del replication"); 285 } 286 boolean removedFromAll = true; 287 for (Table target : targets) { 288 Result res = target.get(get); 289 if (res.size() >= 1) { 290 LOG.info("Row not deleted"); 291 removedFromAll = false; 292 break; 293 } 294 } 295 if (removedFromAll) { 296 break; 297 } else { 298 Thread.sleep(SLEEP_TIME); 299 } 300 } 301 } 302 303 private void putAndWait(byte[] row, byte[] fam, Table source, Table... targets) 304 throws Exception { 305 Put put = new Put(row); 306 put.addColumn(fam, row, row); 307 source.put(put); 308 309 Get get = new Get(row); 310 for (int i = 0; i < NB_RETRIES; i++) { 311 if (i==NB_RETRIES-1) { 312 fail("Waited too much time for put replication"); 313 } 314 boolean replicatedToAll = true; 315 for (Table target : targets) { 316 Result res = target.get(get); 317 if (res.isEmpty()) { 318 LOG.info("Row not available"); 319 replicatedToAll = false; 320 break; 321 } else { 322 assertArrayEquals(res.value(), row); 323 } 324 } 325 if (replicatedToAll) { 326 break; 327 } else { 328 Thread.sleep(SLEEP_TIME); 329 } 330 } 331 } 332 333} 334