001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.spark; 019 020import java.io.File; 021import java.io.IOException; 022import java.io.Serializable; 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.HashMap; 026import java.util.List; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.Path; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.CellUtil; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.HBaseClassTestRule; 033import org.apache.hadoop.hbase.HBaseTestingUtility; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.Admin; 036import org.apache.hadoop.hbase.client.Connection; 037import org.apache.hadoop.hbase.client.ConnectionFactory; 038import org.apache.hadoop.hbase.client.Delete; 039import org.apache.hadoop.hbase.client.Get; 040import org.apache.hadoop.hbase.client.Put; 041import org.apache.hadoop.hbase.client.Result; 042import org.apache.hadoop.hbase.client.Scan; 043import org.apache.hadoop.hbase.client.Table; 044import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 045import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles; 046import org.apache.hadoop.hbase.spark.example.hbasecontext.JavaHBaseBulkDeleteExample; 047import org.apache.hadoop.hbase.testclassification.MediumTests; 048import org.apache.hadoop.hbase.testclassification.MiscTests; 049import org.apache.hadoop.hbase.util.Bytes; 050 051import org.apache.hadoop.hbase.util.Pair; 052import org.apache.spark.api.java.JavaRDD; 053import org.apache.spark.api.java.JavaSparkContext; 054import org.apache.spark.api.java.function.Function; 055import org.junit.After; 056import org.junit.Assert; 057import org.junit.Before; 058import org.junit.ClassRule; 059import org.junit.Test; 060import org.junit.experimental.categories.Category; 061import org.slf4j.Logger; 062import org.slf4j.LoggerFactory; 063import scala.Tuple2; 064import org.apache.hbase.thirdparty.com.google.common.io.Files; 065 066@Category({MiscTests.class, MediumTests.class}) 067public class TestJavaHBaseContext implements Serializable { 068 069 @ClassRule 070 public static final HBaseClassTestRule TIMEOUT = 071 HBaseClassTestRule.forClass(TestJavaHBaseContext.class); 072 073 private transient JavaSparkContext jsc; 074 HBaseTestingUtility htu; 075 protected static final Logger LOG = LoggerFactory.getLogger(TestJavaHBaseContext.class); 076 077 078 079 byte[] tableName = Bytes.toBytes("t1"); 080 byte[] columnFamily = Bytes.toBytes("c"); 081 byte[] columnFamily1 = Bytes.toBytes("d"); 082 String columnFamilyStr = Bytes.toString(columnFamily); 083 String columnFamilyStr1 = Bytes.toString(columnFamily1); 084 085 086 @Before 087 public void setUp() { 088 jsc = new JavaSparkContext("local", "JavaHBaseContextSuite"); 089 090 File tempDir = Files.createTempDir(); 091 tempDir.deleteOnExit(); 092 093 htu = new HBaseTestingUtility(); 094 try { 095 LOG.info("cleaning up test dir"); 096 097 htu.cleanupTestDir(); 098 099 LOG.info("starting minicluster"); 100 101 htu.startMiniZKCluster(); 102 htu.startMiniHBaseCluster(1, 1); 103 104 LOG.info(" - minicluster started"); 105 106 try { 107 htu.deleteTable(TableName.valueOf(tableName)); 108 } catch (Exception e) { 109 LOG.info(" - no table " + Bytes.toString(tableName) + " found"); 110 } 111 112 LOG.info(" - creating table " + Bytes.toString(tableName)); 113 htu.createTable(TableName.valueOf(tableName), 114 new byte[][]{columnFamily, columnFamily1}); 115 LOG.info(" - created table"); 116 } catch (Exception e1) { 117 throw new RuntimeException(e1); 118 } 119 } 120 121 @After 122 public void tearDown() { 123 try { 124 htu.deleteTable(TableName.valueOf(tableName)); 125 LOG.info("shuting down minicluster"); 126 htu.shutdownMiniHBaseCluster(); 127 htu.shutdownMiniZKCluster(); 128 LOG.info(" - minicluster shut down"); 129 htu.cleanupTestDir(); 130 } catch (Exception e) { 131 throw new RuntimeException(e); 132 } 133 jsc.stop(); 134 jsc = null; 135 } 136 137 @Test 138 public void testBulkPut() throws IOException { 139 140 List<String> list = new ArrayList<>(5); 141 list.add("1," + columnFamilyStr + ",a,1"); 142 list.add("2," + columnFamilyStr + ",a,2"); 143 list.add("3," + columnFamilyStr + ",a,3"); 144 list.add("4," + columnFamilyStr + ",a,4"); 145 list.add("5," + columnFamilyStr + ",a,5"); 146 147 JavaRDD<String> rdd = jsc.parallelize(list); 148 149 Configuration conf = htu.getConfiguration(); 150 151 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 152 153 Connection conn = ConnectionFactory.createConnection(conf); 154 Table table = conn.getTable(TableName.valueOf(tableName)); 155 156 try { 157 List<Delete> deletes = new ArrayList<>(5); 158 for (int i = 1; i < 6; i++) { 159 deletes.add(new Delete(Bytes.toBytes(Integer.toString(i)))); 160 } 161 table.delete(deletes); 162 } finally { 163 table.close(); 164 } 165 166 hbaseContext.bulkPut(rdd, 167 TableName.valueOf(tableName), 168 new PutFunction()); 169 170 table = conn.getTable(TableName.valueOf(tableName)); 171 172 try { 173 Result result1 = table.get(new Get(Bytes.toBytes("1"))); 174 Assert.assertNotNull("Row 1 should had been deleted", result1.getRow()); 175 176 Result result2 = table.get(new Get(Bytes.toBytes("2"))); 177 Assert.assertNotNull("Row 2 should had been deleted", result2.getRow()); 178 179 Result result3 = table.get(new Get(Bytes.toBytes("3"))); 180 Assert.assertNotNull("Row 3 should had been deleted", result3.getRow()); 181 182 Result result4 = table.get(new Get(Bytes.toBytes("4"))); 183 Assert.assertNotNull("Row 4 should had been deleted", result4.getRow()); 184 185 Result result5 = table.get(new Get(Bytes.toBytes("5"))); 186 Assert.assertNotNull("Row 5 should had been deleted", result5.getRow()); 187 } finally { 188 table.close(); 189 conn.close(); 190 } 191 } 192 193 public static class PutFunction implements Function<String, Put> { 194 195 private static final long serialVersionUID = 1L; 196 197 public Put call(String v) throws Exception { 198 String[] cells = v.split(","); 199 Put put = new Put(Bytes.toBytes(cells[0])); 200 201 put.addColumn(Bytes.toBytes(cells[1]), Bytes.toBytes(cells[2]), 202 Bytes.toBytes(cells[3])); 203 return put; 204 } 205 } 206 207 @Test 208 public void testBulkDelete() throws IOException { 209 List<byte[]> list = new ArrayList<>(3); 210 list.add(Bytes.toBytes("1")); 211 list.add(Bytes.toBytes("2")); 212 list.add(Bytes.toBytes("3")); 213 214 JavaRDD<byte[]> rdd = jsc.parallelize(list); 215 216 Configuration conf = htu.getConfiguration(); 217 218 populateTableWithMockData(conf, TableName.valueOf(tableName)); 219 220 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 221 222 hbaseContext.bulkDelete(rdd, TableName.valueOf(tableName), 223 new JavaHBaseBulkDeleteExample.DeleteFunction(), 2); 224 225 226 227 try ( 228 Connection conn = ConnectionFactory.createConnection(conf); 229 Table table = conn.getTable(TableName.valueOf(tableName)) 230 ){ 231 Result result1 = table.get(new Get(Bytes.toBytes("1"))); 232 Assert.assertNull("Row 1 should had been deleted", result1.getRow()); 233 234 Result result2 = table.get(new Get(Bytes.toBytes("2"))); 235 Assert.assertNull("Row 2 should had been deleted", result2.getRow()); 236 237 Result result3 = table.get(new Get(Bytes.toBytes("3"))); 238 Assert.assertNull("Row 3 should had been deleted", result3.getRow()); 239 240 Result result4 = table.get(new Get(Bytes.toBytes("4"))); 241 Assert.assertNotNull("Row 4 should had been deleted", result4.getRow()); 242 243 Result result5 = table.get(new Get(Bytes.toBytes("5"))); 244 Assert.assertNotNull("Row 5 should had been deleted", result5.getRow()); 245 } 246 } 247 248 @Test 249 public void testDistributedScan() throws IOException { 250 Configuration conf = htu.getConfiguration(); 251 252 populateTableWithMockData(conf, TableName.valueOf(tableName)); 253 254 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 255 256 Scan scan = new Scan(); 257 scan.setCaching(100); 258 259 JavaRDD<String> javaRdd = 260 hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan) 261 .map(new ScanConvertFunction()); 262 263 List<String> results = javaRdd.collect(); 264 265 Assert.assertEquals(results.size(), 5); 266 } 267 268 private static class ScanConvertFunction implements 269 Function<Tuple2<ImmutableBytesWritable, Result>, String> { 270 @Override 271 public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception { 272 return Bytes.toString(v1._1().copyBytes()); 273 } 274 } 275 276 @Test 277 public void testBulkGet() throws IOException { 278 List<byte[]> list = new ArrayList<>(5); 279 list.add(Bytes.toBytes("1")); 280 list.add(Bytes.toBytes("2")); 281 list.add(Bytes.toBytes("3")); 282 list.add(Bytes.toBytes("4")); 283 list.add(Bytes.toBytes("5")); 284 285 JavaRDD<byte[]> rdd = jsc.parallelize(list); 286 287 Configuration conf = htu.getConfiguration(); 288 289 populateTableWithMockData(conf, TableName.valueOf(tableName)); 290 291 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 292 293 final JavaRDD<String> stringJavaRDD = 294 hbaseContext.bulkGet(TableName.valueOf(tableName), 2, rdd, 295 new GetFunction(), 296 new ResultFunction()); 297 298 Assert.assertEquals(stringJavaRDD.count(), 5); 299 } 300 301 @Test 302 public void testBulkLoad() throws Exception { 303 304 Path output = htu.getDataTestDir("testBulkLoad"); 305 // Add cell as String: "row,falmily,qualifier,value" 306 List<String> list= new ArrayList<String>(); 307 // row1 308 list.add("1," + columnFamilyStr + ",b,1"); 309 // row3 310 list.add("3," + columnFamilyStr + ",a,2"); 311 list.add("3," + columnFamilyStr + ",b,1"); 312 list.add("3," + columnFamilyStr1 + ",a,1"); 313 //row2 314 list.add("2," + columnFamilyStr + ",a,3"); 315 list.add("2," + columnFamilyStr + ",b,3"); 316 317 JavaRDD<String> rdd = jsc.parallelize(list); 318 319 Configuration conf = htu.getConfiguration(); 320 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 321 322 323 324 hbaseContext.bulkLoad(rdd, TableName.valueOf(tableName), new BulkLoadFunction(), output.toUri().getPath(), 325 new HashMap<byte[], FamilyHFileWriteOptions>(), false, HConstants.DEFAULT_MAX_FILE_SIZE); 326 327 try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) { 328 Table table = conn.getTable(TableName.valueOf(tableName)); 329 // Do bulk load 330 LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf); 331 load.doBulkLoad(output, admin, table, conn.getRegionLocator(TableName.valueOf(tableName))); 332 333 334 335 // Check row1 336 List<Cell> cell1 = table.get(new Get(Bytes.toBytes("1"))).listCells(); 337 Assert.assertEquals(cell1.size(), 1); 338 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell1.get(0))), columnFamilyStr); 339 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell1.get(0))), "b"); 340 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell1.get(0))), "1"); 341 342 // Check row3 343 List<Cell> cell3 = table.get(new Get(Bytes.toBytes("3"))).listCells(); 344 Assert.assertEquals(cell3.size(), 3); 345 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(0))), columnFamilyStr); 346 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(0))), "a"); 347 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(0))), "2"); 348 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(1))), columnFamilyStr); 349 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(1))), "b"); 350 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(1))), "1"); 351 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(2))), columnFamilyStr1); 352 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(2))), "a"); 353 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(2))), "1"); 354 355 // Check row2 356 List<Cell> cell2 = table.get(new Get(Bytes.toBytes("2"))).listCells(); 357 Assert.assertEquals(cell2.size(), 2); 358 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(0))), columnFamilyStr); 359 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(0))), "a"); 360 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(0))), "3"); 361 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(1))), columnFamilyStr); 362 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(1))), "b"); 363 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(1))), "3"); 364 } 365 } 366 367 @Test 368 public void testBulkLoadThinRows() throws Exception { 369 Path output = htu.getDataTestDir("testBulkLoadThinRows"); 370 // because of the limitation of scala bulkLoadThinRows API 371 // we need to provide data as <row, all cells in that row> 372 List<List<String>> list= new ArrayList<List<String>>(); 373 // row1 374 List<String> list1 = new ArrayList<String>(); 375 list1.add("1," + columnFamilyStr + ",b,1"); 376 list.add(list1); 377 // row3 378 List<String> list3 = new ArrayList<String>(); 379 list3.add("3," + columnFamilyStr + ",a,2"); 380 list3.add("3," + columnFamilyStr + ",b,1"); 381 list3.add("3," + columnFamilyStr1 + ",a,1"); 382 list.add(list3); 383 //row2 384 List<String> list2 = new ArrayList<String>(); 385 list2.add("2," + columnFamilyStr + ",a,3"); 386 list2.add("2," + columnFamilyStr + ",b,3"); 387 list.add(list2); 388 389 JavaRDD<List<String>> rdd = jsc.parallelize(list); 390 391 Configuration conf = htu.getConfiguration(); 392 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 393 394 hbaseContext.bulkLoadThinRows(rdd, TableName.valueOf(tableName), new BulkLoadThinRowsFunction(), output.toString(), 395 new HashMap<byte[], FamilyHFileWriteOptions>(), false, HConstants.DEFAULT_MAX_FILE_SIZE); 396 397 398 try (Connection conn = ConnectionFactory.createConnection(conf); Admin admin = conn.getAdmin()) { 399 Table table = conn.getTable(TableName.valueOf(tableName)); 400 // Do bulk load 401 LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf); 402 load.doBulkLoad(output, admin, table, conn.getRegionLocator(TableName.valueOf(tableName))); 403 404 // Check row1 405 List<Cell> cell1 = table.get(new Get(Bytes.toBytes("1"))).listCells(); 406 Assert.assertEquals(cell1.size(), 1); 407 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell1.get(0))), columnFamilyStr); 408 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell1.get(0))), "b"); 409 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell1.get(0))), "1"); 410 411 // Check row3 412 List<Cell> cell3 = table.get(new Get(Bytes.toBytes("3"))).listCells(); 413 Assert.assertEquals(cell3.size(), 3); 414 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(0))), columnFamilyStr); 415 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(0))), "a"); 416 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(0))), "2"); 417 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(1))), columnFamilyStr); 418 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(1))), "b"); 419 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(1))), "1"); 420 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell3.get(2))), columnFamilyStr1); 421 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell3.get(2))), "a"); 422 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell3.get(2))), "1"); 423 424 // Check row2 425 List<Cell> cell2 = table.get(new Get(Bytes.toBytes("2"))).listCells(); 426 Assert.assertEquals(cell2.size(), 2); 427 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(0))), columnFamilyStr); 428 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(0))), "a"); 429 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(0))), "3"); 430 Assert.assertEquals(Bytes.toString(CellUtil.cloneFamily(cell2.get(1))), columnFamilyStr); 431 Assert.assertEquals(Bytes.toString(CellUtil.cloneQualifier(cell2.get(1))), "b"); 432 Assert.assertEquals(Bytes.toString(CellUtil.cloneValue(cell2.get(1))), "3"); 433 } 434 435 } 436 public static class BulkLoadFunction implements Function<String, Pair<KeyFamilyQualifier, byte[]>> { 437 438 @Override public Pair<KeyFamilyQualifier, byte[]> call(String v1) throws Exception { 439 if (v1 == null) 440 return null; 441 String[] strs = v1.split(","); 442 if(strs.length != 4) 443 return null; 444 KeyFamilyQualifier kfq = new KeyFamilyQualifier(Bytes.toBytes(strs[0]), Bytes.toBytes(strs[1]), 445 Bytes.toBytes(strs[2])); 446 return new Pair(kfq, Bytes.toBytes(strs[3])); 447 } 448 } 449 450 public static class BulkLoadThinRowsFunction implements Function<List<String>, Pair<ByteArrayWrapper, FamiliesQualifiersValues>> { 451 452 @Override public Pair<ByteArrayWrapper, FamiliesQualifiersValues> call(List<String> list) throws Exception { 453 if (list == null) 454 return null; 455 ByteArrayWrapper rowKey = null; 456 FamiliesQualifiersValues fqv = new FamiliesQualifiersValues(); 457 for (String cell : list) { 458 String[] strs = cell.split(","); 459 if (rowKey == null) { 460 rowKey = new ByteArrayWrapper(Bytes.toBytes(strs[0])); 461 } 462 fqv.add(Bytes.toBytes(strs[1]), Bytes.toBytes(strs[2]), Bytes.toBytes(strs[3])); 463 } 464 return new Pair(rowKey, fqv); 465 } 466 } 467 468 public static class GetFunction implements Function<byte[], Get> { 469 470 private static final long serialVersionUID = 1L; 471 472 public Get call(byte[] v) throws Exception { 473 return new Get(v); 474 } 475 } 476 477 public static class ResultFunction implements Function<Result, String> { 478 479 private static final long serialVersionUID = 1L; 480 481 public String call(Result result) throws Exception { 482 Iterator<Cell> it = result.listCells().iterator(); 483 StringBuilder b = new StringBuilder(); 484 485 b.append(Bytes.toString(result.getRow())).append(":"); 486 487 while (it.hasNext()) { 488 Cell cell = it.next(); 489 String q = Bytes.toString(CellUtil.cloneQualifier(cell)); 490 if ("counter".equals(q)) { 491 b.append("(") 492 .append(q) 493 .append(",") 494 .append(Bytes.toLong(CellUtil.cloneValue(cell))) 495 .append(")"); 496 } else { 497 b.append("(") 498 .append(q) 499 .append(",") 500 .append(Bytes.toString(CellUtil.cloneValue(cell))) 501 .append(")"); 502 } 503 } 504 return b.toString(); 505 } 506 } 507 508 private void populateTableWithMockData(Configuration conf, TableName tableName) 509 throws IOException { 510 try ( 511 Connection conn = ConnectionFactory.createConnection(conf); 512 Table table = conn.getTable(tableName)) { 513 514 List<Put> puts = new ArrayList<>(5); 515 516 for (int i = 1; i < 6; i++) { 517 Put put = new Put(Bytes.toBytes(Integer.toString(i))); 518 put.addColumn(columnFamily, columnFamily, columnFamily); 519 puts.add(put); 520 } 521 table.put(puts); 522 } 523 } 524 525}