001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapred; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023import static org.mockito.Matchers.anyObject; 024import static org.mockito.Mockito.doAnswer; 025import static org.mockito.Mockito.doReturn; 026import static org.mockito.Mockito.doThrow; 027import static org.mockito.Mockito.mock; 028import static org.mockito.Mockito.spy; 029 030import java.io.IOException; 031import java.util.Arrays; 032import java.util.Map; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.hbase.*; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.HBaseClassTestRule; 037import org.apache.hadoop.hbase.client.Connection; 038import org.apache.hadoop.hbase.client.ConnectionFactory; 039import org.apache.hadoop.hbase.client.Put; 040import org.apache.hadoop.hbase.client.Result; 041import org.apache.hadoop.hbase.client.ResultScanner; 042import org.apache.hadoop.hbase.client.Scan; 043import org.apache.hadoop.hbase.client.Table; 044import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 045import org.apache.hadoop.hbase.filter.Filter; 046import org.apache.hadoop.hbase.filter.RegexStringComparator; 047import org.apache.hadoop.hbase.filter.RowFilter; 048import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 049import org.apache.hadoop.hbase.testclassification.LargeTests; 050import org.apache.hadoop.hbase.testclassification.MapReduceTests; 051import org.apache.hadoop.hbase.util.Bytes; 052import org.apache.hadoop.io.NullWritable; 053import org.apache.hadoop.mapred.InputFormat; 054import org.apache.hadoop.mapred.JobClient; 055import org.apache.hadoop.mapred.JobConf; 056import org.apache.hadoop.mapred.JobConfigurable; 057import org.apache.hadoop.mapred.OutputCollector; 058import org.apache.hadoop.mapred.Reporter; 059import org.apache.hadoop.mapred.RunningJob; 060import org.apache.hadoop.mapred.lib.NullOutputFormat; 061import org.junit.AfterClass; 062import org.junit.Before; 063import org.junit.BeforeClass; 064import org.junit.ClassRule; 065import org.junit.Test; 066import org.junit.experimental.categories.Category; 067import org.mockito.invocation.InvocationOnMock; 068import org.mockito.stubbing.Answer; 069import org.slf4j.Logger; 070import org.slf4j.LoggerFactory; 071 072/** 073 * This tests the TableInputFormat and its recovery semantics 074 */ 075@Category({MapReduceTests.class, LargeTests.class}) 076public class TestTableInputFormat { 077 078 @ClassRule 079 public static final HBaseClassTestRule CLASS_RULE = 080 HBaseClassTestRule.forClass(TestTableInputFormat.class); 081 082 private static final Logger LOG = LoggerFactory.getLogger(TestTableInputFormat.class); 083 084 private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); 085 086 static final byte[] FAMILY = Bytes.toBytes("family"); 087 088 private static final byte[][] columns = new byte[][] { FAMILY }; 089 090 @BeforeClass 091 public static void beforeClass() throws Exception { 092 UTIL.startMiniCluster(); 093 } 094 095 @AfterClass 096 public static void afterClass() throws Exception { 097 UTIL.shutdownMiniCluster(); 098 } 099 100 @Before 101 public void before() throws IOException { 102 LOG.info("before"); 103 UTIL.ensureSomeRegionServersAvailable(1); 104 LOG.info("before done"); 105 } 106 107 /** 108 * Setup a table with two rows and values. 109 * 110 * @param tableName the name of the table to create 111 * @return A Table instance for the created table. 112 * @throws IOException 113 */ 114 public static Table createTable(byte[] tableName) throws IOException { 115 return createTable(tableName, new byte[][] { FAMILY }); 116 } 117 118 /** 119 * Setup a table with two rows and values per column family. 120 * 121 * @param tableName 122 * @return A Table instance for the created table. 123 * @throws IOException 124 */ 125 public static Table createTable(byte[] tableName, byte[][] families) throws IOException { 126 Table table = UTIL.createTable(TableName.valueOf(tableName), families); 127 Put p = new Put("aaa".getBytes()); 128 for (byte[] family : families) { 129 p.addColumn(family, null, "value aaa".getBytes()); 130 } 131 table.put(p); 132 p = new Put("bbb".getBytes()); 133 for (byte[] family : families) { 134 p.addColumn(family, null, "value bbb".getBytes()); 135 } 136 table.put(p); 137 return table; 138 } 139 140 /** 141 * Verify that the result and key have expected values. 142 * 143 * @param r 144 * @param key 145 * @param expectedKey 146 * @param expectedValue 147 * @return 148 */ 149 static boolean checkResult(Result r, ImmutableBytesWritable key, 150 byte[] expectedKey, byte[] expectedValue) { 151 assertEquals(0, key.compareTo(expectedKey)); 152 Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY); 153 byte[] value = vals.values().iterator().next(); 154 assertTrue(Arrays.equals(value, expectedValue)); 155 return true; // if succeed 156 } 157 158 /** 159 * Create table data and run tests on specified htable using the 160 * o.a.h.hbase.mapred API. 161 * 162 * @param table 163 * @throws IOException 164 */ 165 static void runTestMapred(Table table) throws IOException { 166 org.apache.hadoop.hbase.mapred.TableRecordReader trr = 167 new org.apache.hadoop.hbase.mapred.TableRecordReader(); 168 trr.setStartRow("aaa".getBytes()); 169 trr.setEndRow("zzz".getBytes()); 170 trr.setHTable(table); 171 trr.setInputColumns(columns); 172 173 trr.init(); 174 Result r = new Result(); 175 ImmutableBytesWritable key = new ImmutableBytesWritable(); 176 177 boolean more = trr.next(key, r); 178 assertTrue(more); 179 checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes()); 180 181 more = trr.next(key, r); 182 assertTrue(more); 183 checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes()); 184 185 // no more data 186 more = trr.next(key, r); 187 assertFalse(more); 188 } 189 190 /** 191 * Create a table that IOE's on first scanner next call 192 * 193 * @throws IOException 194 */ 195 static Table createIOEScannerTable(byte[] name, final int failCnt) 196 throws IOException { 197 // build up a mock scanner stuff to fail the first time 198 Answer<ResultScanner> a = new Answer<ResultScanner>() { 199 int cnt = 0; 200 201 @Override 202 public ResultScanner answer(InvocationOnMock invocation) throws Throwable { 203 // first invocation return the busted mock scanner 204 if (cnt++ < failCnt) { 205 // create mock ResultScanner that always fails. 206 Scan scan = mock(Scan.class); 207 doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe 208 ResultScanner scanner = mock(ResultScanner.class); 209 // simulate TimeoutException / IOException 210 doThrow(new IOException("Injected exception")).when(scanner).next(); 211 return scanner; 212 } 213 214 // otherwise return the real scanner. 215 return (ResultScanner) invocation.callRealMethod(); 216 } 217 }; 218 219 Table htable = spy(createTable(name)); 220 doAnswer(a).when(htable).getScanner((Scan) anyObject()); 221 return htable; 222 } 223 224 /** 225 * Create a table that throws a DoNoRetryIOException on first scanner next 226 * call 227 * 228 * @throws IOException 229 */ 230 static Table createDNRIOEScannerTable(byte[] name, final int failCnt) 231 throws IOException { 232 // build up a mock scanner stuff to fail the first time 233 Answer<ResultScanner> a = new Answer<ResultScanner>() { 234 int cnt = 0; 235 236 @Override 237 public ResultScanner answer(InvocationOnMock invocation) throws Throwable { 238 // first invocation return the busted mock scanner 239 if (cnt++ < failCnt) { 240 // create mock ResultScanner that always fails. 241 Scan scan = mock(Scan.class); 242 doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe 243 ResultScanner scanner = mock(ResultScanner.class); 244 245 invocation.callRealMethod(); // simulate NotServingRegionException 246 doThrow( 247 new NotServingRegionException("Injected simulated TimeoutException")) 248 .when(scanner).next(); 249 return scanner; 250 } 251 252 // otherwise return the real scanner. 253 return (ResultScanner) invocation.callRealMethod(); 254 } 255 }; 256 257 Table htable = spy(createTable(name)); 258 doAnswer(a).when(htable).getScanner((Scan) anyObject()); 259 return htable; 260 } 261 262 /** 263 * Run test assuming no errors using mapred api. 264 * 265 * @throws IOException 266 */ 267 @Test 268 public void testTableRecordReader() throws IOException { 269 Table table = createTable("table1".getBytes()); 270 runTestMapred(table); 271 } 272 273 /** 274 * Run test assuming Scanner IOException failure using mapred api, 275 * 276 * @throws IOException 277 */ 278 @Test 279 public void testTableRecordReaderScannerFail() throws IOException { 280 Table htable = createIOEScannerTable("table2".getBytes(), 1); 281 runTestMapred(htable); 282 } 283 284 /** 285 * Run test assuming Scanner IOException failure using mapred api, 286 * 287 * @throws IOException 288 */ 289 @Test(expected = IOException.class) 290 public void testTableRecordReaderScannerFailTwice() throws IOException { 291 Table htable = createIOEScannerTable("table3".getBytes(), 2); 292 runTestMapred(htable); 293 } 294 295 /** 296 * Run test assuming NotServingRegionException using mapred api. 297 * 298 * @throws org.apache.hadoop.hbase.DoNotRetryIOException 299 */ 300 @Test 301 public void testTableRecordReaderScannerTimeout() throws IOException { 302 Table htable = createDNRIOEScannerTable("table4".getBytes(), 1); 303 runTestMapred(htable); 304 } 305 306 /** 307 * Run test assuming NotServingRegionException using mapred api. 308 * 309 * @throws org.apache.hadoop.hbase.DoNotRetryIOException 310 */ 311 @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class) 312 public void testTableRecordReaderScannerTimeoutTwice() throws IOException { 313 Table htable = createDNRIOEScannerTable("table5".getBytes(), 2); 314 runTestMapred(htable); 315 } 316 317 /** 318 * Verify the example we present in javadocs on TableInputFormatBase 319 */ 320 @Test 321 public void testExtensionOfTableInputFormatBase() throws IOException { 322 LOG.info("testing use of an InputFormat taht extends InputFormatBase"); 323 final Table table = createTable(Bytes.toBytes("exampleTable"), 324 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }); 325 testInputFormat(ExampleTIF.class); 326 } 327 328 @Test 329 public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException { 330 LOG.info("testing use of an InputFormat taht extends InputFormatBase, " 331 + "as it was given in 0.98."); 332 final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"), 333 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }); 334 testInputFormat(ExampleDeprecatedTIF.class); 335 } 336 337 @Test 338 public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException { 339 LOG.info("testing use of an InputFormat taht extends InputFormatBase, " 340 + "using JobConfigurable."); 341 final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"), 342 new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") }); 343 testInputFormat(ExampleJobConfigurableTIF.class); 344 } 345 346 void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { 347 Configuration conf = UTIL.getConfiguration(); 348 final JobConf job = new JobConf(conf); 349 job.setInputFormat(clazz); 350 job.setOutputFormat(NullOutputFormat.class); 351 job.setMapperClass(ExampleVerifier.class); 352 job.setNumReduceTasks(0); 353 LOG.debug("submitting job."); 354 final RunningJob run = JobClient.runJob(job); 355 assertTrue("job failed!", run.isSuccessful()); 356 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() 357 .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); 358 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() 359 .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); 360 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() 361 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); 362 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() 363 .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); 364 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() 365 .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); 366 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() 367 .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); 368 } 369 370 public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> { 371 372 @Override 373 public void configure(JobConf conf) { 374 } 375 376 @Override 377 public void map(ImmutableBytesWritable key, Result value, 378 OutputCollector<NullWritable,NullWritable> output, 379 Reporter reporter) throws IOException { 380 for (Cell cell : value.listCells()) { 381 reporter.getCounter(TestTableInputFormat.class.getName() + ":row", 382 Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) 383 .increment(1l); 384 reporter.getCounter(TestTableInputFormat.class.getName() + ":family", 385 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength())) 386 .increment(1l); 387 reporter.getCounter(TestTableInputFormat.class.getName() + ":value", 388 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())) 389 .increment(1l); 390 } 391 } 392 393 @Override 394 public void close() { 395 } 396 397 } 398 399 public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable { 400 401 @Override 402 public void configure(JobConf job) { 403 try { 404 Connection connection = ConnectionFactory.createConnection(job); 405 Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable")); 406 // mandatory 407 initializeTable(connection, exampleTable.getName()); 408 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), 409 Bytes.toBytes("columnB") }; 410 // mandatory 411 setInputColumns(inputColumns); 412 Filter exampleFilter = 413 new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); 414 // optional 415 setRowFilter(exampleFilter); 416 } catch (IOException exception) { 417 throw new RuntimeException("Failed to configure for job.", exception); 418 } 419 } 420 421 } 422 423 public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable { 424 425 @Override 426 public void configure(JobConf job) { 427 try { 428 initialize(job); 429 } catch (IOException exception) { 430 throw new RuntimeException("Failed to initialize.", exception); 431 } 432 } 433 434 @Override 435 protected void initialize(JobConf job) throws IOException { 436 initialize(job, "exampleJobConfigurableTable"); 437 } 438 } 439 440 441 public static class ExampleTIF extends TableInputFormatBase { 442 443 @Override 444 protected void initialize(JobConf job) throws IOException { 445 initialize(job, "exampleTable"); 446 } 447 448 protected void initialize(JobConf job, String table) throws IOException { 449 Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job)); 450 TableName tableName = TableName.valueOf(table); 451 // mandatory 452 initializeTable(connection, tableName); 453 byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"), 454 Bytes.toBytes("columnB") }; 455 // mandatory 456 setInputColumns(inputColumns); 457 Filter exampleFilter = 458 new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*")); 459 // optional 460 setRowFilter(exampleFilter); 461 } 462 463 } 464 465} 466