001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapred;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023import static org.mockito.Matchers.anyObject;
024import static org.mockito.Mockito.doAnswer;
025import static org.mockito.Mockito.doReturn;
026import static org.mockito.Mockito.doThrow;
027import static org.mockito.Mockito.mock;
028import static org.mockito.Mockito.spy;
029
030import java.io.IOException;
031import java.util.Arrays;
032import java.util.Map;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.hbase.*;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.HBaseClassTestRule;
037import org.apache.hadoop.hbase.client.Connection;
038import org.apache.hadoop.hbase.client.ConnectionFactory;
039import org.apache.hadoop.hbase.client.Put;
040import org.apache.hadoop.hbase.client.Result;
041import org.apache.hadoop.hbase.client.ResultScanner;
042import org.apache.hadoop.hbase.client.Scan;
043import org.apache.hadoop.hbase.client.Table;
044import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
045import org.apache.hadoop.hbase.filter.Filter;
046import org.apache.hadoop.hbase.filter.RegexStringComparator;
047import org.apache.hadoop.hbase.filter.RowFilter;
048import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
049import org.apache.hadoop.hbase.testclassification.LargeTests;
050import org.apache.hadoop.hbase.testclassification.MapReduceTests;
051import org.apache.hadoop.hbase.util.Bytes;
052import org.apache.hadoop.io.NullWritable;
053import org.apache.hadoop.mapred.InputFormat;
054import org.apache.hadoop.mapred.JobClient;
055import org.apache.hadoop.mapred.JobConf;
056import org.apache.hadoop.mapred.JobConfigurable;
057import org.apache.hadoop.mapred.OutputCollector;
058import org.apache.hadoop.mapred.Reporter;
059import org.apache.hadoop.mapred.RunningJob;
060import org.apache.hadoop.mapred.lib.NullOutputFormat;
061import org.junit.AfterClass;
062import org.junit.Before;
063import org.junit.BeforeClass;
064import org.junit.ClassRule;
065import org.junit.Test;
066import org.junit.experimental.categories.Category;
067import org.mockito.invocation.InvocationOnMock;
068import org.mockito.stubbing.Answer;
069import org.slf4j.Logger;
070import org.slf4j.LoggerFactory;
071
072/**
073 * This tests the TableInputFormat and its recovery semantics
074 */
075@Category({MapReduceTests.class, LargeTests.class})
076public class TestTableInputFormat {
077
078  @ClassRule
079  public static final HBaseClassTestRule CLASS_RULE =
080      HBaseClassTestRule.forClass(TestTableInputFormat.class);
081
082  private static final Logger LOG = LoggerFactory.getLogger(TestTableInputFormat.class);
083
084  private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
085
086  static final byte[] FAMILY = Bytes.toBytes("family");
087
088  private static final byte[][] columns = new byte[][] { FAMILY };
089
090  @BeforeClass
091  public static void beforeClass() throws Exception {
092    UTIL.startMiniCluster();
093  }
094
095  @AfterClass
096  public static void afterClass() throws Exception {
097    UTIL.shutdownMiniCluster();
098  }
099
100  @Before
101  public void before() throws IOException {
102    LOG.info("before");
103    UTIL.ensureSomeRegionServersAvailable(1);
104    LOG.info("before done");
105  }
106
107  /**
108   * Setup a table with two rows and values.
109   *
110   * @param tableName the name of the table to create
111   * @return A Table instance for the created table.
112   * @throws IOException
113   */
114  public static Table createTable(byte[] tableName) throws IOException {
115    return createTable(tableName, new byte[][] { FAMILY });
116  }
117
118  /**
119   * Setup a table with two rows and values per column family.
120   *
121   * @param tableName
122   * @return A Table instance for the created table.
123   * @throws IOException
124   */
125  public static Table createTable(byte[] tableName, byte[][] families) throws IOException {
126    Table table = UTIL.createTable(TableName.valueOf(tableName), families);
127    Put p = new Put("aaa".getBytes());
128    for (byte[] family : families) {
129      p.addColumn(family, null, "value aaa".getBytes());
130    }
131    table.put(p);
132    p = new Put("bbb".getBytes());
133    for (byte[] family : families) {
134      p.addColumn(family, null, "value bbb".getBytes());
135    }
136    table.put(p);
137    return table;
138  }
139
140  /**
141   * Verify that the result and key have expected values.
142   *
143   * @param r
144   * @param key
145   * @param expectedKey
146   * @param expectedValue
147   * @return
148   */
149  static boolean checkResult(Result r, ImmutableBytesWritable key,
150      byte[] expectedKey, byte[] expectedValue) {
151    assertEquals(0, key.compareTo(expectedKey));
152    Map<byte[], byte[]> vals = r.getFamilyMap(FAMILY);
153    byte[] value = vals.values().iterator().next();
154    assertTrue(Arrays.equals(value, expectedValue));
155    return true; // if succeed
156  }
157
158  /**
159   * Create table data and run tests on specified htable using the
160   * o.a.h.hbase.mapred API.
161   *
162   * @param table
163   * @throws IOException
164   */
165  static void runTestMapred(Table table) throws IOException {
166    org.apache.hadoop.hbase.mapred.TableRecordReader trr =
167        new org.apache.hadoop.hbase.mapred.TableRecordReader();
168    trr.setStartRow("aaa".getBytes());
169    trr.setEndRow("zzz".getBytes());
170    trr.setHTable(table);
171    trr.setInputColumns(columns);
172
173    trr.init();
174    Result r = new Result();
175    ImmutableBytesWritable key = new ImmutableBytesWritable();
176
177    boolean more = trr.next(key, r);
178    assertTrue(more);
179    checkResult(r, key, "aaa".getBytes(), "value aaa".getBytes());
180
181    more = trr.next(key, r);
182    assertTrue(more);
183    checkResult(r, key, "bbb".getBytes(), "value bbb".getBytes());
184
185    // no more data
186    more = trr.next(key, r);
187    assertFalse(more);
188  }
189
190  /**
191   * Create a table that IOE's on first scanner next call
192   *
193   * @throws IOException
194   */
195  static Table createIOEScannerTable(byte[] name, final int failCnt)
196      throws IOException {
197    // build up a mock scanner stuff to fail the first time
198    Answer<ResultScanner> a = new Answer<ResultScanner>() {
199      int cnt = 0;
200
201      @Override
202      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
203        // first invocation return the busted mock scanner
204        if (cnt++ < failCnt) {
205          // create mock ResultScanner that always fails.
206          Scan scan = mock(Scan.class);
207          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
208          ResultScanner scanner = mock(ResultScanner.class);
209          // simulate TimeoutException / IOException
210          doThrow(new IOException("Injected exception")).when(scanner).next();
211          return scanner;
212        }
213
214        // otherwise return the real scanner.
215        return (ResultScanner) invocation.callRealMethod();
216      }
217    };
218
219    Table htable = spy(createTable(name));
220    doAnswer(a).when(htable).getScanner((Scan) anyObject());
221    return htable;
222  }
223
224  /**
225   * Create a table that throws a DoNoRetryIOException on first scanner next
226   * call
227   *
228   * @throws IOException
229   */
230  static Table createDNRIOEScannerTable(byte[] name, final int failCnt)
231      throws IOException {
232    // build up a mock scanner stuff to fail the first time
233    Answer<ResultScanner> a = new Answer<ResultScanner>() {
234      int cnt = 0;
235
236      @Override
237      public ResultScanner answer(InvocationOnMock invocation) throws Throwable {
238        // first invocation return the busted mock scanner
239        if (cnt++ < failCnt) {
240          // create mock ResultScanner that always fails.
241          Scan scan = mock(Scan.class);
242          doReturn("bogus".getBytes()).when(scan).getStartRow(); // avoid npe
243          ResultScanner scanner = mock(ResultScanner.class);
244
245          invocation.callRealMethod(); // simulate NotServingRegionException
246          doThrow(
247              new NotServingRegionException("Injected simulated TimeoutException"))
248              .when(scanner).next();
249          return scanner;
250        }
251
252        // otherwise return the real scanner.
253        return (ResultScanner) invocation.callRealMethod();
254      }
255    };
256
257    Table htable = spy(createTable(name));
258    doAnswer(a).when(htable).getScanner((Scan) anyObject());
259    return htable;
260  }
261
262  /**
263   * Run test assuming no errors using mapred api.
264   *
265   * @throws IOException
266   */
267  @Test
268  public void testTableRecordReader() throws IOException {
269    Table table = createTable("table1".getBytes());
270    runTestMapred(table);
271  }
272
273  /**
274   * Run test assuming Scanner IOException failure using mapred api,
275   *
276   * @throws IOException
277   */
278  @Test
279  public void testTableRecordReaderScannerFail() throws IOException {
280    Table htable = createIOEScannerTable("table2".getBytes(), 1);
281    runTestMapred(htable);
282  }
283
284  /**
285   * Run test assuming Scanner IOException failure using mapred api,
286   *
287   * @throws IOException
288   */
289  @Test(expected = IOException.class)
290  public void testTableRecordReaderScannerFailTwice() throws IOException {
291    Table htable = createIOEScannerTable("table3".getBytes(), 2);
292    runTestMapred(htable);
293  }
294
295  /**
296   * Run test assuming NotServingRegionException using mapred api.
297   *
298   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
299   */
300  @Test
301  public void testTableRecordReaderScannerTimeout() throws IOException {
302    Table htable = createDNRIOEScannerTable("table4".getBytes(), 1);
303    runTestMapred(htable);
304  }
305
306  /**
307   * Run test assuming NotServingRegionException using mapred api.
308   *
309   * @throws org.apache.hadoop.hbase.DoNotRetryIOException
310   */
311  @Test(expected = org.apache.hadoop.hbase.NotServingRegionException.class)
312  public void testTableRecordReaderScannerTimeoutTwice() throws IOException {
313    Table htable = createDNRIOEScannerTable("table5".getBytes(), 2);
314    runTestMapred(htable);
315  }
316
317  /**
318   * Verify the example we present in javadocs on TableInputFormatBase
319   */
320  @Test
321  public void testExtensionOfTableInputFormatBase() throws IOException {
322    LOG.info("testing use of an InputFormat taht extends InputFormatBase");
323    final Table table = createTable(Bytes.toBytes("exampleTable"),
324      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
325    testInputFormat(ExampleTIF.class);
326  }
327
328  @Test
329  public void testDeprecatedExtensionOfTableInputFormatBase() throws IOException {
330    LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
331        + "as it was given in 0.98.");
332    final Table table = createTable(Bytes.toBytes("exampleDeprecatedTable"),
333      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
334    testInputFormat(ExampleDeprecatedTIF.class);
335  }
336
337  @Test
338  public void testJobConfigurableExtensionOfTableInputFormatBase() throws IOException {
339    LOG.info("testing use of an InputFormat taht extends InputFormatBase, "
340        + "using JobConfigurable.");
341    final Table table = createTable(Bytes.toBytes("exampleJobConfigurableTable"),
342      new byte[][] { Bytes.toBytes("columnA"), Bytes.toBytes("columnB") });
343    testInputFormat(ExampleJobConfigurableTIF.class);
344  }
345
346  void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
347    Configuration conf = UTIL.getConfiguration();
348    final JobConf job = new JobConf(conf);
349    job.setInputFormat(clazz);
350    job.setOutputFormat(NullOutputFormat.class);
351    job.setMapperClass(ExampleVerifier.class);
352    job.setNumReduceTasks(0);
353    LOG.debug("submitting job.");
354    final RunningJob run = JobClient.runJob(job);
355    assertTrue("job failed!", run.isSuccessful());
356    assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
357        .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
358    assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
359        .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
360    assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
361        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
362    assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
363        .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
364    assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
365        .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
366    assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
367        .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
368  }
369
370  public static class ExampleVerifier implements TableMap<NullWritable, NullWritable> {
371
372    @Override
373    public void configure(JobConf conf) {
374    }
375
376    @Override
377    public void map(ImmutableBytesWritable key, Result value,
378        OutputCollector<NullWritable,NullWritable> output,
379        Reporter reporter) throws IOException {
380      for (Cell cell : value.listCells()) {
381        reporter.getCounter(TestTableInputFormat.class.getName() + ":row",
382            Bytes.toString(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()))
383            .increment(1l);
384        reporter.getCounter(TestTableInputFormat.class.getName() + ":family",
385            Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()))
386            .increment(1l);
387        reporter.getCounter(TestTableInputFormat.class.getName() + ":value",
388            Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()))
389            .increment(1l);
390      }
391    }
392
393    @Override
394    public void close() {
395    }
396
397  }
398
399  public static class ExampleDeprecatedTIF extends TableInputFormatBase implements JobConfigurable {
400
401    @Override
402    public void configure(JobConf job) {
403      try {
404        Connection connection = ConnectionFactory.createConnection(job);
405        Table exampleTable = connection.getTable(TableName.valueOf("exampleDeprecatedTable"));
406        // mandatory
407        initializeTable(connection, exampleTable.getName());
408        byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
409          Bytes.toBytes("columnB") };
410        // mandatory
411        setInputColumns(inputColumns);
412        Filter exampleFilter =
413          new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
414        // optional
415        setRowFilter(exampleFilter);
416      } catch (IOException exception) {
417        throw new RuntimeException("Failed to configure for job.", exception);
418      }
419    }
420
421  }
422
423  public static class ExampleJobConfigurableTIF extends ExampleTIF implements JobConfigurable {
424
425    @Override
426    public void configure(JobConf job) {
427      try {
428        initialize(job);
429      } catch (IOException exception) {
430        throw new RuntimeException("Failed to initialize.", exception);
431      }
432    }
433
434    @Override
435    protected void initialize(JobConf job) throws IOException {
436      initialize(job, "exampleJobConfigurableTable");
437    }
438  }
439
440
441  public static class ExampleTIF extends TableInputFormatBase {
442
443    @Override
444    protected void initialize(JobConf job) throws IOException {
445      initialize(job, "exampleTable");
446    }
447
448    protected void initialize(JobConf job, String table) throws IOException {
449      Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create(job));
450      TableName tableName = TableName.valueOf(table);
451      // mandatory
452      initializeTable(connection, tableName);
453      byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
454        Bytes.toBytes("columnB") };
455      // mandatory
456      setInputColumns(inputColumns);
457      Filter exampleFilter =
458        new RowFilter(CompareOperator.EQUAL, new RegexStringComparator("aa.*"));
459      // optional
460      setRowFilter(exampleFilter);
461    }
462
463  }
464
465}
466