001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.spark;
019
020import java.io.File;
021import java.io.IOException;
022import java.io.Serializable;
023import java.util.ArrayList;
024import java.util.Iterator;
025import java.util.HashMap;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellUtil;
031import org.apache.hadoop.hbase.HConstants;
032import org.apache.hadoop.hbase.HBaseClassTestRule;
033import org.apache.hadoop.hbase.HBaseTestingUtility;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.Admin;
036import org.apache.hadoop.hbase.client.Connection;
037import org.apache.hadoop.hbase.client.ConnectionFactory;
038import org.apache.hadoop.hbase.client.Delete;
039import org.apache.hadoop.hbase.client.Get;
040import org.apache.hadoop.hbase.client.Put;
041import org.apache.hadoop.hbase.client.Result;
042import org.apache.hadoop.hbase.client.Scan;
043import org.apache.hadoop.hbase.client.Table;
044import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
045import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
046import org.apache.hadoop.hbase.spark.example.hbasecontext.JavaHBaseBulkDeleteExample;
047import org.apache.hadoop.hbase.testclassification.MediumTests;
048import org.apache.hadoop.hbase.testclassification.MiscTests;
049import org.apache.hadoop.hbase.util.Bytes;
050
051import org.apache.hadoop.hbase.util.Pair;
052import org.apache.spark.api.java.JavaRDD;
053import org.apache.spark.api.java.JavaSparkContext;
054import org.apache.spark.api.java.function.Function;
055import org.junit.After;
056import org.junit.Assert;
057import org.junit.Before;
058import org.junit.ClassRule;
059import org.junit.Test;
060import org.junit.experimental.categories.Category;
061import org.slf4j.Logger;
062import org.slf4j.LoggerFactory;
063import scala.Tuple2;
064import org.apache.hbase.thirdparty.com.google.common.io.Files;
065
066@Category({MiscTests.class, MediumTests.class})
067public class TestJavaHBaseContext implements Serializable {
068
069  @ClassRule
070  public static final HBaseClassTestRule TIMEOUT =
071      HBaseClassTestRule.forClass(TestJavaHBaseContext.class);
072
073  private transient JavaSparkContext jsc;
074  HBaseTestingUtility htu;
075  protected static final Logger LOG = LoggerFactory.getLogger(TestJavaHBaseContext.class);
076
077
078
079  byte[] tableName = Bytes.toBytes("t1");
080  byte[] columnFamily = Bytes.toBytes("c");
081  byte[] columnFamily1 = Bytes.toBytes("d");
082  String columnFamilyStr = Bytes.toString(columnFamily);
083  String columnFamilyStr1 = Bytes.toString(columnFamily1);
084
085
086  @Before
087  public void setUp() {
088    jsc = new JavaSparkContext("local", "JavaHBaseContextSuite");
089
090    File tempDir = Files.createTempDir();
091    tempDir.deleteOnExit();
092
093    htu = new HBaseTestingUtility();
094    try {
095      LOG.info("cleaning up test dir");
096
097      htu.cleanupTestDir();
098
099      LOG.info("starting minicluster");
100
101      htu.startMiniZKCluster();
102      htu.startMiniHBaseCluster(1, 1);
103
104      LOG.info(" - minicluster started");
105
106      try {
107        htu.deleteTable(TableName.valueOf(tableName));
108      } catch (Exception e) {
109        LOG.info(" - no table " + Bytes.toString(tableName) + " found");
110      }
111
112      LOG.info(" - creating table " + Bytes.toString(tableName));
113      htu.createTable(TableName.valueOf(tableName),
114          new byte[][]{columnFamily, columnFamily1});
115      LOG.info(" - created table");
116    } catch (Exception e1) {
117      throw new RuntimeException(e1);
118    }
119  }
120
121  @After
122  public void tearDown() {
123    try {
124      htu.deleteTable(TableName.valueOf(tableName));
125      LOG.info("shuting down minicluster");
126      htu.shutdownMiniHBaseCluster();
127      htu.shutdownMiniZKCluster();
128      LOG.info(" - minicluster shut down");
129      htu.cleanupTestDir();
130    } catch (Exception e) {
131      throw new RuntimeException(e);
132    }
133    jsc.stop();
134    jsc = null;
135  }
136
137  @Test
138  public void testBulkPut() throws IOException {
139
140    List<String> list = new ArrayList<>(5);
141    list.add("1," + columnFamilyStr + ",a,1");
142    list.add("2," + columnFamilyStr + ",a,2");
143    list.add("3," + columnFamilyStr + ",a,3");
144    list.add("4," + columnFamilyStr + ",a,4");
145    list.add("5," + columnFamilyStr + ",a,5");
146
147    JavaRDD<String> rdd = jsc.parallelize(list);
148
149    Configuration conf = htu.getConfiguration();
150
151    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
152
153    Connection conn = ConnectionFactory.createConnection(conf);
154    Table table = conn.getTable(TableName.valueOf(tableName));
155
156    try {
157      List<Delete> deletes = new ArrayList<>(5);
158      for (int i = 1; i < 6; i++) {
159        deletes.add(new Delete(Bytes.toBytes(Integer.toString(i))));
160      }
161      table.delete(deletes);
162    } finally {
163      table.close();
164    }
165
166    hbaseContext.bulkPut(rdd,
167            TableName.valueOf(tableName),
168            new PutFunction());
169
170    table = conn.getTable(TableName.valueOf(tableName));
171
172    try {
173      Result result1 = table.get(new Get(Bytes.toBytes("1")));
174      Assert.assertNotNull("Row 1 should had been deleted", result1.getRow());
175
176      Result result2 = table.get(new Get(Bytes.toBytes("2")));
177      Assert.assertNotNull("Row 2 should had been deleted", result2.getRow());
178
179      Result result3 = table.get(new Get(Bytes.toBytes("3")));
180      Assert.assertNotNull("Row 3 should had been deleted", result3.getRow());
181
182      Result result4 = table.get(new Get(Bytes.toBytes("4")));
183      Assert.assertNotNull("Row 4 should had been deleted", result4.getRow());
184
185      Result result5 = table.get(new Get(Bytes.toBytes("5")));
186      Assert.assertNotNull("Row 5 should had been deleted", result5.getRow());
187    } finally {
188      table.close();
189      conn.close();
190    }
191  }
192
193  public static class PutFunction implements Function<String, Put> {
194
195    private static final long serialVersionUID = 1L;
196
197    public Put call(String v) throws Exception {
198      String[] cells = v.split(",");
199      Put put = new Put(Bytes.toBytes(cells[0]));
200
201      put.addColumn(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]),
202              Bytes.toBytes(cells[3]));
203      return put;
204    }
205  }
206
207  @Test
208  public void testBulkDelete() throws IOException {
209    List<byte[]> list = new ArrayList<>(3);
210    list.add(Bytes.toBytes("1"));
211    list.add(Bytes.toBytes("2"));
212    list.add(Bytes.toBytes("3"));
213
214    JavaRDD<byte[]> rdd = jsc.parallelize(list);
215
216    Configuration conf = htu.getConfiguration();
217
218    populateTableWithMockData(conf, TableName.valueOf(tableName));
219
220    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
221
222    hbaseContext.bulkDelete(rdd, TableName.valueOf(tableName),
223            new JavaHBaseBulkDeleteExample.DeleteFunction(), 2);
224
225
226
227    try (
228            Connection conn = ConnectionFactory.createConnection(conf);
229            Table table = conn.getTable(TableName.valueOf(tableName))
230    ){
231      Result result1 = table.get(new Get(Bytes.toBytes("1")));
232      Assert.assertNull("Row 1 should had been deleted", result1.getRow());
233
234      Result result2 = table.get(new Get(Bytes.toBytes("2")));
235      Assert.assertNull("Row 2 should had been deleted", result2.getRow());
236
237      Result result3 = table.get(new Get(Bytes.toBytes("3")));
238      Assert.assertNull("Row 3 should had been deleted", result3.getRow());
239
240      Result result4 = table.get(new Get(Bytes.toBytes("4")));
241      Assert.assertNotNull("Row 4 should had been deleted", result4.getRow());
242
243      Result result5 = table.get(new Get(Bytes.toBytes("5")));
244      Assert.assertNotNull("Row 5 should had been deleted", result5.getRow());
245    }
246  }
247
248  @Test
249  public void testDistributedScan() throws IOException {
250    Configuration conf = htu.getConfiguration();
251
252    populateTableWithMockData(conf, TableName.valueOf(tableName));
253
254    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
255
256    Scan scan = new Scan();
257    scan.setCaching(100);
258
259    JavaRDD<String> javaRdd =
260            hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan)
261                    .map(new ScanConvertFunction());
262
263    List<String> results = javaRdd.collect();
264
265    Assert.assertEquals(results.size(), 5);
266  }
267
268  private static class ScanConvertFunction implements
269          Function<Tuple2<ImmutableBytesWritable, Result>, String> {
270    @Override
271    public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception {
272      return Bytes.toString(v1._1().copyBytes());
273    }
274  }
275
276  @Test
277  public void testBulkGet() throws IOException {
278    List<byte[]> list = new ArrayList<>(5);
279    list.add(Bytes.toBytes("1"));
280    list.add(Bytes.toBytes("2"));
281    list.add(Bytes.toBytes("3"));
282    list.add(Bytes.toBytes("4"));
283    list.add(Bytes.toBytes("5"));
284
285    JavaRDD<byte[]> rdd = jsc.parallelize(list);
286
287    Configuration conf = htu.getConfiguration();
288
289    populateTableWithMockData(conf, TableName.valueOf(tableName));
290
291    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
292
293    final JavaRDD<String> stringJavaRDD =
294            hbaseContext.bulkGet(TableName.valueOf(tableName), 2, rdd,
295            new GetFunction(),
296            new ResultFunction());
297
298    Assert.assertEquals(stringJavaRDD.count(), 5);
299  }
300
301  @Test
302  public void testBulkLoad() throws Exception {
303
304    Path output = htu.getDataTestDir("testBulkLoad");
305    // Add cell as String: "row,falmily,qualifier,value"
306    List<String> list= new ArrayList<String>();
307    // row1
308    list.add("1," + columnFamilyStr + ",b,1");
309    // row3
310    list.add("3," + columnFamilyStr + ",a,2");
311    list.add("3," + columnFamilyStr + ",b,1");
312    list.add("3," + columnFamilyStr1 + ",a,1");
313    //row2
314    list.add("2," + columnFamilyStr + ",a,3");
315    list.add("2," + columnFamilyStr + ",b,3");
316
317    JavaRDD<String> rdd = jsc.parallelize(list);
318
319    Configuration conf = htu.getConfiguration();
320    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
321
322
323
324    hbaseContext.bulkLoad(rdd, TableName.valueOf(tableName), new BulkLoadFunction(), output.toUri().getPath(),
325        new HashMap<byte[], FamilyHFileWriteOptions>(), false, HConstants.DEFAULT_MAX_FILE_SIZE);
326
327    try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) {
328      Table table = conn.getTable(TableName.valueOf(tableName));
329      // Do bulk load
330      LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf);
331      load.doBulkLoad(output, admin, table, conn.getRegionLocator(TableName.valueOf(tableName)));
332
333
334
335      // Check row1
336      List<Cell> cell1 = table.get(new Get(Bytes.toBytes("1"))).listCells();
337      Assert.assertEquals(cell1.size(), 1);
338      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell1.get(0))), columnFamilyStr);
339      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell1.get(0))), "b");
340      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell1.get(0))), "1");
341
342      // Check row3
343      List<Cell> cell3 = table.get(new Get(Bytes.toBytes("3"))).listCells();
344      Assert.assertEquals(cell3.size(), 3);
345      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(0))), columnFamilyStr);
346      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(0))), "a");
347      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(0))), "2");
348      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(1))), columnFamilyStr);
349      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(1))), "b");
350      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(1))), "1");
351      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(2))), columnFamilyStr1);
352      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(2))), "a");
353      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(2))), "1");
354
355      // Check row2
356      List<Cell> cell2 = table.get(new Get(Bytes.toBytes("2"))).listCells();
357      Assert.assertEquals(cell2.size(), 2);
358      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(0))), columnFamilyStr);
359      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(0))), "a");
360      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(0))), "3");
361      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(1))), columnFamilyStr);
362      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(1))), "b");
363      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(1))), "3");
364    }
365  }
366
367  @Test
368  public void testBulkLoadThinRows() throws Exception {
369    Path output = htu.getDataTestDir("testBulkLoadThinRows");
370    // because of the limitation of scala bulkLoadThinRows API
371    // we need to provide data as <row, all cells in that row>
372    List<List<String>> list= new ArrayList<List<String>>();
373    // row1
374    List<String> list1 = new ArrayList<String>();
375    list1.add("1," + columnFamilyStr + ",b,1");
376    list.add(list1);
377    // row3
378    List<String> list3 = new ArrayList<String>();
379    list3.add("3," + columnFamilyStr + ",a,2");
380    list3.add("3," + columnFamilyStr + ",b,1");
381    list3.add("3," + columnFamilyStr1 + ",a,1");
382    list.add(list3);
383    //row2
384    List<String> list2 = new ArrayList<String>();
385    list2.add("2," + columnFamilyStr + ",a,3");
386    list2.add("2," + columnFamilyStr + ",b,3");
387    list.add(list2);
388
389    JavaRDD<List<String>> rdd = jsc.parallelize(list);
390
391    Configuration conf = htu.getConfiguration();
392    JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
393
394    hbaseContext.bulkLoadThinRows(rdd, TableName.valueOf(tableName), new BulkLoadThinRowsFunction(), output.toString(),
395        new HashMap<byte[], FamilyHFileWriteOptions>(), false, HConstants.DEFAULT_MAX_FILE_SIZE);
396
397
398    try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) {
399      Table table = conn.getTable(TableName.valueOf(tableName));
400      // Do bulk load
401      LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf);
402      load.doBulkLoad(output, admin, table, conn.getRegionLocator(TableName.valueOf(tableName)));
403
404      // Check row1
405      List<Cell> cell1 = table.get(new Get(Bytes.toBytes("1"))).listCells();
406      Assert.assertEquals(cell1.size(), 1);
407      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell1.get(0))), columnFamilyStr);
408      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell1.get(0))), "b");
409      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell1.get(0))), "1");
410
411      // Check row3
412      List<Cell> cell3 = table.get(new Get(Bytes.toBytes("3"))).listCells();
413      Assert.assertEquals(cell3.size(), 3);
414      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(0))), columnFamilyStr);
415      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(0))), "a");
416      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(0))), "2");
417      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(1))), columnFamilyStr);
418      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(1))), "b");
419      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(1))), "1");
420      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(2))), columnFamilyStr1);
421      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(2))), "a");
422      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(2))), "1");
423
424      // Check row2
425      List<Cell> cell2 = table.get(new Get(Bytes.toBytes("2"))).listCells();
426      Assert.assertEquals(cell2.size(), 2);
427      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(0))), columnFamilyStr);
428      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(0))), "a");
429      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(0))), "3");
430      Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(1))), columnFamilyStr);
431      Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(1))), "b");
432      Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(1))), "3");
433    }
434
435  }
436  public static class BulkLoadFunction implements Function<String, Pair<KeyFamilyQualifier, byte[]>> {
437
438    @Override public Pair<KeyFamilyQualifier, byte[]> call(String v1) throws Exception {
439      if (v1 == null)
440        return null;
441      String[] strs = v1.split(",");
442      if(strs.length != 4)
443        return null;
444      KeyFamilyQualifier kfq = new KeyFamilyQualifier(Bytes.toBytes(strs[0]), Bytes.toBytes(strs[1]),
445          Bytes.toBytes(strs[2]));
446      return new Pair(kfq, Bytes.toBytes(strs[3]));
447    }
448  }
449
450  public static class BulkLoadThinRowsFunction implements Function<List<String>, Pair<ByteArrayWrapper, FamiliesQualifiersValues>> {
451
452    @Override public Pair<ByteArrayWrapper, FamiliesQualifiersValues> call(List<String> list) throws Exception {
453      if (list == null)
454        return null;
455      ByteArrayWrapper rowKey = null;
456      FamiliesQualifiersValues fqv = new FamiliesQualifiersValues();
457      for (String cell : list) {
458        String[] strs = cell.split(",");
459        if (rowKey == null) {
460          rowKey = new ByteArrayWrapper(Bytes.toBytes(strs[0]));
461        }
462        fqv.add(Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2]), Bytes.toBytes(strs[3]));
463      }
464      return new Pair(rowKey, fqv);
465    }
466  }
467
468  public static class GetFunction implements Function<byte[], Get> {
469
470    private static final long serialVersionUID = 1L;
471
472    public Get call(byte[] v) throws Exception {
473      return new Get(v);
474    }
475  }
476
477  public static class ResultFunction implements Function<Result, String> {
478
479    private static final long serialVersionUID = 1L;
480
481    public String call(Result result) throws Exception {
482      Iterator<Cell> it = result.listCells().iterator();
483      StringBuilder b = new StringBuilder();
484
485      b.append(Bytes.toString(result.getRow())).append(":");
486
487      while (it.hasNext()) {
488        Cell cell = it.next();
489        String q = Bytes.toString(CellUtil.cloneQualifier(cell));
490        if ("counter".equals(q)) {
491          b.append("(")
492                  .append(q)
493                  .append(",")
494                  .append(Bytes.toLong(CellUtil.cloneValue(cell)))
495                  .append(")");
496        } else {
497          b.append("(")
498                  .append(q)
499                  .append(",")
500                  .append(Bytes.toString(CellUtil.cloneValue(cell)))
501                  .append(")");
502        }
503      }
504      return b.toString();
505    }
506  }
507
508  private void populateTableWithMockData(Configuration conf, TableName tableName)
509          throws IOException {
510    try (
511      Connection conn = ConnectionFactory.createConnection(conf);
512      Table table = conn.getTable(tableName)) {
513
514      List<Put> puts = new ArrayList<>(5);
515
516      for (int i = 1; i < 6; i++) {
517        Put put = new Put(Bytes.toBytes(Integer.toString(i)));
518        put.addColumn(columnFamily, columnFamily, columnFamily);
519        puts.add(put);
520      }
521      table.put(puts);
522    }
523  }
524
525}