001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023import static org.mockito.Matchers.anyObject;
024import static org.mockito.Mockito.doAnswer;
025import static org.mockito.Mockito.doReturn;
026import static org.mockito.Mockito.doThrow;
027import static org.mockito.Mockito.mock;
028import static org.mockito.Mockito.spy;
029
030import java.io.IOException;
031import java.util.Arrays;
032import java.util.Map;
033import org.apache.hadoop.hbase.*;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.client.Connection;
036import org.apache.hadoop.hbase.client.ConnectionFactory;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.client.Result;
039import org.apache.hadoop.hbase.client.ResultScanner;
040import org.apache.hadoop.hbase.client.Scan;
041import org.apache.hadoop.hbase.client.Table;
042import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
043import org.apache.hadoop.hbase.filter.Filter;
044import org.apache.hadoop.hbase.filter.RegexStringComparator;
045import org.apache.hadoop.hbase.filter.RowFilter;
046import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.io.NullWritable;
050import org.apache.hadoop.mapred.JobConf;
051import org.apache.hadoop.mapred.JobConfigurable;
052import org.apache.hadoop.mapred.MiniMRCluster;
053import org.apache.hadoop.mapreduce.InputFormat;
054import org.apache.hadoop.mapreduce.Job;
055import org.apache.hadoop.mapreduce.JobContext;
056import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
057import org.junit.AfterClass;
058import org.junit.Before;
059import org.junit.BeforeClass;
060import org.junit.ClassRule;
061import org.junit.Test;
062import org.junit.experimental.categories.Category;
063import org.mockito.invocation.InvocationOnMock;
064import org.mockito.stubbing.Answer;
065import org.slf4j.Logger;
066import org.slf4j.LoggerFactory;
067
068/**
069 * This tests the TableInputFormat and its recovery semantics
070 *
071 */
072@Category(LargeTests.class)
073public class TestTableInputFormat {
074
075  @ClassRule
076  public static final HBaseClassTestRule CLASS_RULE =
077      HBaseClassTestRule.forClass(TestTableInputFormat.class);
078
079  private static final Logger LOG = LoggerFactory.getLogger(TestTableInputFormat.class);
080
081  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
082  private static MiniMRCluster mrCluster;
083  static final byte[] FAMILY = Bytes.toBytes("family");
084
085  private static final byte[][] columns = new byte[][] { FAMILY };
086
087  @BeforeClass
088  public static void beforeClass() throws Exception {
089    UTIL.startMiniCluster();
090  }
091
092  @AfterClass
093  public static void afterClass() throws Exception {
094    UTIL.shutdownMiniCluster();
095  }
096
097  @Before
098  public void before() throws IOException {
099    LOG.info("before");
100    UTIL.ensureSomeRegionServersAvailable(1);
101    LOG.info("before done");
102  }
103
104  /**
105   * Setup a table with two rows and values.
106   *
107   * @param tableName
108   * @return A Table instance for the created table.
109   * @throws IOException
110   */
111  public static Table createTable(byte[] tableName) throws IOException {
112    return createTable(tableName, new byte[][] { FAMILY });
113  }
114
115  /**
116   * Setup a table with two rows and values per column family.
117   *
118   * @param tableName
119   * @return A Table instance for the created table.
120   * @throws IOException
121   */
122  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
123    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
124    Put p = new Put("aaa".getBytes());
125    for (byte[] family : families) {
126      p.addColumn(family, null, "value aaa".getBytes());
127    }
128    table.put(p);
129    p = new Put("bbb".getBytes());
130    for (byte[] family : families) {
131      p.addColumn(family, null, "value bbb".getBytes());
132    }
133    table.put(p);
134    return table;
135  }
136
137  /**
138   * Verify that the result and key have expected values.
139   *
140   * @param r
141   * @param key
142   * @param expectedKey
143   * @param expectedValue
144   * @return
145   */
146  static boolean checkResult(Result r, ImmutableBytesWritable key,
147      byte[] expectedKey, byte[] expectedValue) {
148    assertEquals(0, key.compareTo(expectedKey));
149    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
150    byte[] value = vals.values().iterator().next();
151    assertTrue(Arrays.equals(value, expectedValue));
152    return true; // if succeed
153  }
154
155  /**
156   * Create table data and run tests on specified htable using the
157   * o.a.h.hbase.mapreduce API.
158   *
159   * @param table
160   * @throws IOException
161   * @throws InterruptedException
162   */
163  static void runTestMapreduce(Table table) throws IOException,
164      InterruptedException {
165    org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl trr =
166        new org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl();
167    Scan s = new Scan();
168    s.setStartRow("aaa".getBytes());
169    s.setStopRow("zzz".getBytes());
170    s.addFamily(FAMILY);
171    trr.setScan(s);
172    trr.setHTable(table);
173
174    trr.initialize(null, null);
175    Result r = new Result();
176    ImmutableBytesWritable key = new ImmutableBytesWritable();
177
178    boolean more = trr.nextKeyValue();
179    assertTrue(more);
180    key = trr.getCurrentKey();
181    r = trr.getCurrentValue();
182    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
183
184    more = trr.nextKeyValue();
185    assertTrue(more);
186    key = trr.getCurrentKey();
187    r = trr.getCurrentValue();
188    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
189
190    // no more data
191    more = trr.nextKeyValue();
192    assertFalse(more);
193  }
194
195  /**
196   * Create a table that IOE's on first scanner next call
197   *
198   * @throws IOException
199   */
200  static Table createIOEScannerTable(byte[] name, final int failCnt)
201      throws IOException {
202    // build up a mock scanner stuff to fail the first time
203    Answer<ResultScanner> a = new Answer<ResultScanner>() {
204      int cnt = 0;
205
206      @Override
207      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
208        // first invocation return the busted mock scanner
209        if (cnt++ < failCnt) {
210          // create mock ResultScanner that always fails.
211          Scan scan = mock(Scan.class);
212          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
213          ResultScanner scanner = mock(ResultScanner.class);
214          // simulate TimeoutException / IOException
215          doThrow(new IOException("Injected exception")).when(scanner).next();
216          return scanner;
217        }
218
219        // otherwise return the real scanner.
220        return (ResultScanner) invocation.callRealMethod();
221      }
222    };
223
224    Table htable = spy(createTable(name));
225    doAnswer(a).when(htable).getScanner((Scan) anyObject());
226    return htable;
227  }
228
229  /**
230   * Create a table that throws a NotServingRegionException on first scanner
231   * next call
232   *
233   * @throws IOException
234   */
235  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
236      throws IOException {
237    // build up a mock scanner stuff to fail the first time
238    Answer<ResultScanner> a = new Answer<ResultScanner>() {
239      int cnt = 0;
240
241      @Override
242      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
243        // first invocation return the busted mock scanner
244        if (cnt++ < failCnt) {
245          // create mock ResultScanner that always fails.
246          Scan scan = mock(Scan.class);
247          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
248          ResultScanner scanner = mock(ResultScanner.class);
249
250          invocation.callRealMethod(); // simulate NotServingRegionException
251          doThrow(
252              new NotServingRegionException("Injected simulated TimeoutException"))
253              .when(scanner).next();
254          return scanner;
255        }
256
257        // otherwise return the real scanner.
258        return (ResultScanner) invocation.callRealMethod();
259      }
260    };
261
262    Table htable = spy(createTable(name));
263    doAnswer(a).when(htable).getScanner((Scan) anyObject());
264    return htable;
265  }
266
267  /**
268   * Run test assuming no errors using newer mapreduce api
269   *
270   * @throws IOException
271   * @throws InterruptedException
272   */
273  @Test
274  public void testTableRecordReaderMapreduce() throws IOException,
275      InterruptedException {
276    Table table = createTable("table1-mr".getBytes());
277    runTestMapreduce(table);
278  }
279
280  /**
281   * Run test assuming Scanner IOException failure using newer mapreduce api
282   *
283   * @throws IOException
284   * @throws InterruptedException
285   */
286  @Test
287  public void testTableRecordReaderScannerFailMapreduce() throws IOException,
288      InterruptedException {
289    Table htable = createIOEScannerTable("table2-mr".getBytes(), 1);
290    runTestMapreduce(htable);
291  }
292
293  /**
294   * Run test assuming Scanner IOException failure using newer mapreduce api
295   *
296   * @throws IOException
297   * @throws InterruptedException
298   */
299  @Test(expected = IOException.class)
300  public void testTableRecordReaderScannerFailMapreduceTwice() throws IOException,
301      InterruptedException {
302    Table htable = createIOEScannerTable("table3-mr".getBytes(), 2);
303    runTestMapreduce(htable);
304  }
305
306  /**
307   * Run test assuming NotServingRegionException using newer mapreduce api
308   *
309   * @throws InterruptedException
310   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
311   */
312  @Test
313  public void testTableRecordReaderScannerTimeoutMapreduce()
314      throws IOException, InterruptedException {
315    Table htable = createDNRIOEScannerTable("table4-mr".getBytes(), 1);
316    runTestMapreduce(htable);
317  }
318
319  /**
320   * Run test assuming NotServingRegionException using newer mapreduce api
321   *
322   * @throws InterruptedException
323   * @throws org.apache.hadoop.hbase.NotServingRegionException
324   */
325  @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
326  public void testTableRecordReaderScannerTimeoutMapreduceTwice()
327      throws IOException, InterruptedException {
328    Table htable = createDNRIOEScannerTable("table5-mr".getBytes(), 2);
329    runTestMapreduce(htable);
330  }
331
332  /**
333   * Verify the example we present in javadocs on TableInputFormatBase
334   */
335  @Test
336  public void testExtensionOfTableInputFormatBase()
337      throws IOException, InterruptedException, ClassNotFoundException {
338    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
339    final Table htable = createTable(Bytes.toBytes("exampleTable"),
340      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
341    testInputFormat(ExampleTIF.class);
342  }
343
344  @Test
345  public void testJobConfigurableExtensionOfTableInputFormatBase()
346      throws IOException, InterruptedException, ClassNotFoundException {
347    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
348        "using JobConfigurable.");
349    final Table htable = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
350      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
351    testInputFormat(ExampleJobConfigurableTIF.class);
352  }
353
354  @Test
355  public void testDeprecatedExtensionOfTableInputFormatBase()
356      throws IOException, InterruptedException, ClassNotFoundException {
357    LOG.info("testing use of an InputFormat taht extends InputFormatBase, " +
358        "using the approach documented in 0.98.");
359    final Table htable = createTable(Bytes.toBytes("exampleDeprecatedTable"),
360      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
361    testInputFormat(ExampleDeprecatedTIF.class);
362  }
363
364  void testInputFormat(Class<? extends InputFormat> clazz)
365      throws IOException, InterruptedException, ClassNotFoundException {
366    final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
367    job.setInputFormatClass(clazz);
368    job.setOutputFormatClass(NullOutputFormat.class);
369    job.setMapperClass(ExampleVerifier.class);
370    job.setNumReduceTasks(0);
371
372    LOG.debug("submitting job.");
373    assertTrue("job failed!", job.waitForCompletion(true));
374    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
375        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
376    assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
377        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
378    assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
379        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
380    assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
381        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
382    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
383        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
384    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
385        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
386  }
387
388  public static class ExampleVerifier extends TableMapper<NullWritable, NullWritable> {
389
390    @Override
391    public void map(ImmutableBytesWritable key, Result value, Context context)
392        throws IOException {
393      for (Cell cell : value.listCells()) {
394        context.getCounter(TestTableInputFormat.class.getName() + ":row",
395            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
396            .increment(1l);
397        context.getCounter(TestTableInputFormat.class.getName() + ":family",
398            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
399            .increment(1l);
400        context.getCounter(TestTableInputFormat.class.getName() + ":value",
401            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
402            .increment(1l);
403      }
404    }
405
406  }
407
408  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
409
410    @Override
411    public void configure(JobConf job) {
412      try {
413        Connection connection = ConnectionFactory.createConnection(job);
414        Table exampleTable = connection.getTable(TableName.valueOf(("exampleDeprecatedTable")));
415        // mandatory
416        initializeTable(connection, exampleTable.getName());
417        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
418          Bytes.toBytes("columnB") };
419        // optional
420        Scan scan = new Scan();
421        for (byte[] family : inputColumns) {
422          scan.addFamily(family);
423        }
424        Filter exampleFilter =
425          new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
426        scan.setFilter(exampleFilter);
427        setScan(scan);
428      } catch (IOException exception) {
429        throw new RuntimeException("Failed to configure for job.", exception);
430      }
431    }
432
433  }
434
435
436  public static class ExampleJobConfigurableTIF extends TableInputFormatBase
437      implements JobConfigurable {
438
439    @Override
440    public void configure(JobConf job) {
441      try {
442        Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
443        TableName tableName = TableName.valueOf("exampleJobConfigurableTable");
444        // mandatory
445        initializeTable(connection, tableName);
446        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
447          Bytes.toBytes("columnB") };
448        //optional
449        Scan scan = new Scan();
450        for (byte[] family : inputColumns) {
451          scan.addFamily(family);
452        }
453        Filter exampleFilter =
454          new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
455        scan.setFilter(exampleFilter);
456        setScan(scan);
457      } catch (IOException exception) {
458        throw new RuntimeException("Failed to initialize.", exception);
459      }
460    }
461  }
462
463
464  public static class ExampleTIF extends TableInputFormatBase {
465
466    @Override
467    protected void initialize(JobContext job) throws IOException {
468      Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(
469          job.getConfiguration()));
470      TableName tableName = TableName.valueOf("exampleTable");
471      // mandatory
472      initializeTable(connection, tableName);
473      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
474        Bytes.toBytes("columnB") };
475      //optional
476      Scan scan = new Scan();
477      for (byte[] family : inputColumns) {
478        scan.addFamily(family);
479      }
480      Filter exampleFilter =
481        new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
482      scan.setFilter(exampleFilter);
483      setScan(scan);
484    }
485
486  }
487}
488