001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023import static org.mockito.Matchers.anyObject; 024import static org.mockito.Mockito.doAnswer; 025import static org.mockito.Mockito.doReturn; 026import static org.mockito.Mockito.doThrow; 027import static org.mockito.Mockito.mock; 028import static org.mockito.Mockito.spy; 029 030import java.io.IOException; 031import java.util.Arrays; 032import java.util.Map; 033import org.apache.hadoop.hbase.*; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.client.ConnectionFactory; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.client.Result; 039import org.apache.hadoop.hbase.client.ResultScanner; 040import org.apache.hadoop.hbase.client.Scan; 041import org.apache.hadoop.hbase.client.Table; 042import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 043import org.apache.hadoop.hbase.filter.Filter; 044import org.apache.hadoop.hbase.filter.RegexStringComparator; 045import org.apache.hadoop.hbase.filter.RowFilter; 046import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.io.NullWritable; 050import org.apache.hadoop.mapred.JobConf; 051import org.apache.hadoop.mapred.JobConfigurable; 052import org.apache.hadoop.mapred.MiniMRCluster; 053import org.apache.hadoop.mapreduce.InputFormat; 054import org.apache.hadoop.mapreduce.Job; 055import org.apache.hadoop.mapreduce.JobContext; 056import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; 057import org.junit.AfterClass; 058import org.junit.Before; 059import org.junit.BeforeClass; 060import org.junit.ClassRule; 061import org.junit.Test; 062import org.junit.experimental.categories.Category; 063import org.mockito.invocation.InvocationOnMock; 064import org.mockito.stubbing.Answer; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068/** 069 * This tests the TableInputFormat and its recovery semantics 070 * 071 */ 072@Category(LargeTests.class) 073public class TestTableInputFormat { 074 075 @ClassRule 076 public static final HBaseClassTestRule CLASS_RULE = 077 HBaseClassTestRule.forClass(TestTableInputFormat.class); 078 079 private static final Logger LOG = LoggerFactory.getLogger(TestTableInputFormat.class); 080 081 private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); 082 private static MiniMRCluster mrCluster; 083 static final byte[] FAMILY = Bytes.toBytes("family"); 084 085 private static final byte[][] columns = new byte[][] { FAMILY }; 086 087 @BeforeClass 088 public static void beforeClass() throws Exception { 089 UTIL.startMiniCluster(); 090 } 091 092 @AfterClass 093 public static void afterClass() throws Exception { 094 UTIL.shutdownMiniCluster(); 095 } 096 097 @Before 098 public void before() throws IOException { 099 LOG.info("before"); 100 UTIL.ensureSomeRegionServersAvailable(1); 101 LOG.info("before done"); 102 } 103 104 /** 105 * Setup a table with two rows and values. 106 * 107 * @param tableName 108 * @return A Table instance for the created table. 109 * @throws IOException 110 */ 111 public static Table createTable(byte[] tableName) throws IOException { 112 return createTable(tableName, new byte[][] { FAMILY }); 113 } 114 115 /** 116 * Setup a table with two rows and values per column family. 117 * 118 * @param tableName 119 * @return A Table instance for the created table. 120 * @throws IOException 121 */ 122 public static Table createTable(byte[] tableName, byte[][] families) throws IOException { 123 Table table = UTIL.createTable(TableName.valueOf(tableName), families); 124 Put p = new Put("aaa".getBytes()); 125 for (byte[] family : families) { 126 p.addColumn(family, null, "value aaa".getBytes()); 127 } 128 table.put(p); 129 p = new Put("bbb".getBytes()); 130 for (byte[] family : families) { 131 p.addColumn(family, null, "value bbb".getBytes()); 132 } 133 table.put(p); 134 return table; 135 } 136 137 /** 138 * Verify that the result and key have expected values. 139 * 140 * @param r 141 * @param key 142 * @param expectedKey 143 * @param expectedValue 144 * @return 145 */ 146 static boolean checkResult(Result r, ImmutableBytesWritable key, 147 byte[] expectedKey, byte[] expectedValue) { 148 assertEquals(0, key.compareTo(expectedKey)); 149 Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY); 150 byte[] value = vals.values().iterator().next(); 151 assertTrue(Arrays.equals(value, expectedValue)); 152 return true; // if succeed 153 } 154 155 /** 156 * Create table data and run tests on specified htable using the 157 * o.a.h.hbase.mapreduce API. 158 * 159 * @param table 160 * @throws IOException 161 * @throws InterruptedException 162 */ 163 static void runTestMapreduce(Table table) throws IOException, 164 InterruptedException { 165 org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr = 166 new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl(); 167 Scan s = new Scan(); 168 s.setStartRow("aaa".getBytes()); 169 s.setStopRow("zzz".getBytes()); 170 s.addFamily(FAMILY); 171 trr.setScan(s); 172 trr.setHTable(table); 173 174 trr.initialize(null, null); 175 Result r = new Result(); 176 ImmutableBytesWritable key = new ImmutableBytesWritable(); 177 178 boolean more = trr.nextKeyValue(); 179 assertTrue(more); 180 key = trr.getCurrentKey(); 181 r = trr.getCurrentValue(); 182 checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes()); 183 184 more = trr.nextKeyValue(); 185 assertTrue(more); 186 key = trr.getCurrentKey(); 187 r = trr.getCurrentValue(); 188 checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes()); 189 190 // no more data 191 more = trr.nextKeyValue(); 192 assertFalse(more); 193 } 194 195 /** 196 * Create a table that IOE's on first scanner next call 197 * 198 * @throws IOException 199 */ 200 static Table createIOEScannerTable(byte[] name, final int failCnt) 201 throws IOException { 202 // build up a mock scanner stuff to fail the first time 203 Answer<ResultScanner> a = new Answer<ResultScanner>() { 204 int cnt = 0; 205 206 @Override 207 public ResultScanner answer(InvocationOnMock invocation) throws Throwable { 208 // first invocation return the busted mock scanner 209 if (cnt++ < failCnt) { 210 // create mock ResultScanner that always fails. 211 Scan scan = mock(Scan.class); 212 doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe 213 ResultScanner scanner = mock(ResultScanner.class); 214 // simulate TimeoutException / IOException 215 doThrow(new IOException("Injected exception")).when(scanner).next(); 216 return scanner; 217 } 218 219 // otherwise return the real scanner. 220 return (ResultScanner) invocation.callRealMethod(); 221 } 222 }; 223 224 Table htable = spy(createTable(name)); 225 doAnswer(a).when(htable).getScanner((Scan) anyObject()); 226 return htable; 227 } 228 229 /** 230 * Create a table that throws a NotServingRegionException on first scanner 231 * next call 232 * 233 * @throws IOException 234 */ 235 static Table createDNRIOEScannerTable(byte[] name, final int failCnt) 236 throws IOException { 237 // build up a mock scanner stuff to fail the first time 238 Answer<ResultScanner> a = new Answer<ResultScanner>() { 239 int cnt = 0; 240 241 @Override 242 public ResultScanner answer(InvocationOnMock invocation) throws Throwable { 243 // first invocation return the busted mock scanner 244 if (cnt++ < failCnt) { 245 // create mock ResultScanner that always fails. 246 Scan scan = mock(Scan.class); 247 doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe 248 ResultScanner scanner = mock(ResultScanner.class); 249 250 invocation.callRealMethod(); // simulate NotServingRegionException 251 doThrow( 252 new NotServingRegionException("Injected simulated TimeoutException")) 253 .when(scanner).next(); 254 return scanner; 255 } 256 257 // otherwise return the real scanner. 258 return (ResultScanner) invocation.callRealMethod(); 259 } 260 }; 261 262 Table htable = spy(createTable(name)); 263 doAnswer(a).when(htable).getScanner((Scan) anyObject()); 264 return htable; 265 } 266 267 /** 268 * Run test assuming no errors using newer mapreduce api 269 * 270 * @throws IOException 271 * @throws InterruptedException 272 */ 273 @Test 274 public void testTableRecordReaderMapreduce() throws IOException, 275 InterruptedException { 276 Table table = createTable("table1-mr".getBytes()); 277 runTestMapreduce(table); 278 } 279 280 /** 281 * Run test assuming Scanner IOException failure using newer mapreduce api 282 * 283 * @throws IOException 284 * @throws InterruptedException 285 */ 286 @Test 287 public void testTableRecordReaderScannerFailMapreduce() throws IOException, 288 InterruptedException { 289 Table htable = createIOEScannerTable("table2-mr".getBytes(), 1); 290 runTestMapreduce(htable); 291 } 292 293 /** 294 * Run test assuming Scanner IOException failure using newer mapreduce api 295 * 296 * @throws IOException 297 * @throws InterruptedException 298 */ 299 @Test(expected = IOException.class) 300 public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException, 301 InterruptedException { 302 Table htable = createIOEScannerTable("table3-mr".getBytes(), 2); 303 runTestMapreduce(htable); 304 } 305 306 /** 307 * Run test assuming NotServingRegionException using newer mapreduce api 308 * 309 * @throws InterruptedException 310 * @throws org.apache.hadoop.hbase.DoNotRetryIOException 311 */ 312 @Test 313 public void testTableRecordReaderScannerTimeoutMapreduce() 314 throws IOException, InterruptedException { 315 Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1); 316 runTestMapreduce(htable); 317 } 318 319 /** 320 * Run test assuming NotServingRegionException using newer mapreduce api 321 * 322 * @throws InterruptedException 323 * @throws org.apache.hadoop.hbase.NotServingRegionException 324 */ 325 @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class) 326 public void testTableRecordReaderScannerTimeoutMapreduceTwice() 327 throws IOException, InterruptedException { 328 Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2); 329 runTestMapreduce(htable); 330 } 331 332 /** 333 * Verify the example we present in javadocs on TableInputFormatBase 334 */ 335 @Test 336 public void testExtensionOfTableInputFormatBase() 337 throws IOException, InterruptedException, ClassNotFoundException { 338 LOG.info("testing use of an InputFormat taht extends InputFormatBase"); 339 final Table htable = createTable(Bytes.toBytes("exampleTable"), 340 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }); 341 testInputFormat(ExampleTIF.class); 342 } 343 344 @Test 345 public void testJobConfigurableExtensionOfTableInputFormatBase() 346 throws IOException, InterruptedException, ClassNotFoundException { 347 LOG.info("testing use of an InputFormat taht extends InputFormatBase, " + 348 "using JobConfigurable."); 349 final Table htable = createTable(Bytes.toBytes("exampleJobConfigurableTable"), 350 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }); 351 testInputFormat(ExampleJobConfigurableTIF.class); 352 } 353 354 @Test 355 public void testDeprecatedExtensionOfTableInputFormatBase() 356 throws IOException, InterruptedException, ClassNotFoundException { 357 LOG.info("testing use of an InputFormat taht extends InputFormatBase, " + 358 "using the approach documented in 0.98."); 359 final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"), 360 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }); 361 testInputFormat(ExampleDeprecatedTIF.class); 362 } 363 364 void testInputFormat(Class<? extends InputFormat> clazz) 365 throws IOException, InterruptedException, ClassNotFoundException { 366 final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration()); 367 job.setInputFormatClass(clazz); 368 job.setOutputFormatClass(NullOutputFormat.class); 369 job.setMapperClass(ExampleVerifier.class); 370 job.setNumReduceTasks(0); 371 372 LOG.debug("submitting job."); 373 assertTrue("job failed!", job.waitForCompletion(true)); 374 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters() 375 .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue()); 376 assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters() 377 .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue()); 378 assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters() 379 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue()); 380 assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters() 381 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue()); 382 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters() 383 .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue()); 384 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters() 385 .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue()); 386 } 387 388 public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> { 389 390 @Override 391 public void map(ImmutableBytesWritable key, Result value, Context context) 392 throws IOException { 393 for (Cell cell : value.listCells()) { 394 context.getCounter(TestTableInputFormat.class.getName() + ":row", 395 Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) 396 .increment(1l); 397 context.getCounter(TestTableInputFormat.class.getName() + ":family", 398 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength())) 399 .increment(1l); 400 context.getCounter(TestTableInputFormat.class.getName() + ":value", 401 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())) 402 .increment(1l); 403 } 404 } 405 406 } 407 408 public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable { 409 410 @Override 411 public void configure(JobConf job) { 412 try { 413 Connection connection = ConnectionFactory.createConnection(job); 414 Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable"))); 415 // mandatory 416 initializeTable(connection, exampleTable.getName()); 417 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), 418 Bytes.toBytes("columnB") }; 419 // optional 420 Scan scan = new Scan(); 421 for (byte[] family : inputColumns) { 422 scan.addFamily(family); 423 } 424 Filter exampleFilter = 425 new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); 426 scan.setFilter(exampleFilter); 427 setScan(scan); 428 } catch (IOException exception) { 429 throw new RuntimeException("Failed to configure for job.", exception); 430 } 431 } 432 433 } 434 435 436 public static class ExampleJobConfigurableTIF extends TableInputFormatBase 437 implements JobConfigurable { 438 439 @Override 440 public void configure(JobConf job) { 441 try { 442 Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); 443 TableName tableName = TableName.valueOf("exampleJobConfigurableTable"); 444 // mandatory 445 initializeTable(connection, tableName); 446 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), 447 Bytes.toBytes("columnB") }; 448 //optional 449 Scan scan = new Scan(); 450 for (byte[] family : inputColumns) { 451 scan.addFamily(family); 452 } 453 Filter exampleFilter = 454 new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); 455 scan.setFilter(exampleFilter); 456 setScan(scan); 457 } catch (IOException exception) { 458 throw new RuntimeException("Failed to initialize.", exception); 459 } 460 } 461 } 462 463 464 public static class ExampleTIF extends TableInputFormatBase { 465 466 @Override 467 protected void initialize(JobContext job) throws IOException { 468 Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create( 469 job.getConfiguration())); 470 TableName tableName = TableName.valueOf("exampleTable"); 471 // mandatory 472 initializeTable(connection, tableName); 473 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), 474 Bytes.toBytes("columnB") }; 475 //optional 476 Scan scan = new Scan(); 477 for (byte[] family : inputColumns) { 478 scan.addFamily(family); 479 } 480 Filter exampleFilter = 481 new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); 482 scan.setFilter(exampleFilter); 483 setScan(scan); 484 } 485 486 } 487} 488