001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.hadoop.hbase.spark.example.hbasecontext;
018
019import java.util.List;
020
021import org.apache.hadoop.conf.Configuration;
022import org.apache.hadoop.hbase.HBaseConfiguration;
023import org.apache.hadoop.hbase.TableName;
024import org.apache.hadoop.hbase.client.Result;
025import org.apache.hadoop.hbase.client.Scan;
026import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
027import org.apache.hadoop.hbase.spark.JavaHBaseContext;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.spark.SparkConf;
030import org.apache.spark.api.java.JavaRDD;
031import org.apache.spark.api.java.JavaSparkContext;
032
033import org.apache.spark.api.java.function.Function;
034import scala.Tuple2;
035
036/**
037 * This is a simple example of scanning records from HBase
038 * with the hbaseRDD function.
039 */
040final public class JavaHBaseDistributedScan {
041
042  private JavaHBaseDistributedScan() {}
043
044  public static void main(String[] args) {
045    if (args.length < 1) {
046      System.out.println("JavaHBaseDistributedScan {tableName}");
047      return;
048    }
049
050    String tableName = args[0];
051
052    SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseDistributedScan " + tableName);
053    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
054
055    try {
056      Configuration conf = HBaseConfiguration.create();
057
058      JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);
059
060      Scan scan = new Scan();
061      scan.setCaching(100);
062
063      JavaRDD<Tuple2<ImmutableBytesWritable, Result>> javaRdd =
064              hbaseContext.hbaseRDD(TableName.valueOf(tableName), scan);
065
066      List<String> results = javaRdd.map(new ScanConvertFunction()).collect();
067
068      System.out.println("Result Size: " + results.size());
069    } finally {
070      jsc.stop();
071    }
072  }
073
074  private static class ScanConvertFunction implements
075          Function<Tuple2<ImmutableBytesWritable, Result>, String> {
076    @Override
077    public String call(Tuple2<ImmutableBytesWritable, Result> v1) throws Exception {
078      return Bytes.toString(v1._1().copyBytes());
079    }
080  }
081}