001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.hadoop.hbase.spark.example.hbasecontext; 018 019import java.util.List; 020 021import org.apache.hadoop.conf.Configuration; 022import org.apache.hadoop.hbase.HBaseConfiguration; 023import org.apache.hadoop.hbase.TableName; 024import org.apache.hadoop.hbase.client.Result; 025import org.apache.hadoop.hbase.client.Scan; 026import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 027import org.apache.hadoop.hbase.spark.JavaHBaseContext; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.spark.SparkConf; 030import org.apache.spark.api.java.JavaRDD; 031import org.apache.spark.api.java.JavaSparkContext; 032 033import org.apache.spark.api.java.function.Function; 034import scala.Tuple2; 035 036/** 037 * This is a simple example of scanning records from HBase 038 * with the hbaseRDD function. 039 */ 040final public class JavaHBaseDistributedScan { 041 042 private JavaHBaseDistributedScan() {} 043 044 public static void main(String[] args) { 045 if (args.length < 1) { 046 System.out.println("JavaHBaseDistributedScan {tableName}"); 047 return; 048 } 049 050 String tableName = args[0]; 051 052 SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseDistributedScan " + tableName); 053 JavaSparkContext jsc = new JavaSparkContext(sparkConf); 054 055 try { 056 Configuration conf = HBaseConfiguration.create(); 057 058 JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); 059 060 Scan scan = new Scan(); 061 scan.setCaching(100); 062 063 JavaRDD<Tuple2<ImmutableBytesWritable, Result>> javaRdd = 064 hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan); 065 066 List<String> results = javaRdd.map(new ScanConvertFunction()).collect(); 067 068 System.out.println("Result Size: " + results.size()); 069 } finally { 070 jsc.stop(); 071 } 072 } 073 074 private static class ScanConvertFunction implements 075 Function<Tuple2<ImmutableBytesWritable, Result>, String> { 076 @Override 077 public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception { 078 return Bytes.toString(v1._1().copyBytes()); 079 } 080 } 081}