001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.mapred; 020 021import org.apache.hadoop.fs.Path; 022import org.apache.hadoop.hbase.HRegionInfo; 023import org.apache.hadoop.hbase.HTableDescriptor; 024import org.apache.yetus.audience.InterfaceAudience; 025import org.apache.hadoop.hbase.client.Result; 026import org.apache.hadoop.hbase.client.Scan; 027import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 028import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl; 029import org.apache.hadoop.hbase.util.RegionSplitter; 030import org.apache.hadoop.mapred.InputFormat; 031import org.apache.hadoop.mapred.InputSplit; 032import org.apache.hadoop.mapred.JobConf; 033import org.apache.hadoop.mapred.RecordReader; 034import org.apache.hadoop.mapred.Reporter; 035import org.apache.hadoop.mapreduce.Job; 036 037import java.io.DataInput; 038import java.io.DataOutput; 039import java.io.IOException; 040import java.util.List; 041 042/** 043 * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further 044 * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}. 045 * 046 * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat 047 */ 048@InterfaceAudience.Public 049public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> { 050 051 public static class TableSnapshotRegionSplit implements InputSplit { 052 private TableSnapshotInputFormatImpl.InputSplit delegate; 053 054 // constructor for mapreduce framework / Writable 055 public TableSnapshotRegionSplit() { 056 this.delegate = new TableSnapshotInputFormatImpl.InputSplit(); 057 } 058 059 public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) { 060 this.delegate = delegate; 061 } 062 063 public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo, 064 List<String> locations, Scan scan, Path restoreDir) { 065 this.delegate = 066 new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir); 067 } 068 069 @Override 070 public long getLength() throws IOException { 071 return delegate.getLength(); 072 } 073 074 @Override 075 public String[] getLocations() throws IOException { 076 return delegate.getLocations(); 077 } 078 079 @Override 080 public void write(DataOutput out) throws IOException { 081 delegate.write(out); 082 } 083 084 @Override 085 public void readFields(DataInput in) throws IOException { 086 delegate.readFields(in); 087 } 088 } 089 090 static class TableSnapshotRecordReader 091 implements RecordReader<ImmutableBytesWritable, Result> { 092 093 private TableSnapshotInputFormatImpl.RecordReader delegate; 094 095 public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job) 096 throws IOException { 097 delegate = new TableSnapshotInputFormatImpl.RecordReader(); 098 delegate.initialize(split.delegate, job); 099 } 100 101 @Override 102 public boolean next(ImmutableBytesWritable key, Result value) throws IOException { 103 if (!delegate.nextKeyValue()) { 104 return false; 105 } 106 ImmutableBytesWritable currentKey = delegate.getCurrentKey(); 107 key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength()); 108 value.copyFrom(delegate.getCurrentValue()); 109 return true; 110 } 111 112 @Override 113 public ImmutableBytesWritable createKey() { 114 return new ImmutableBytesWritable(); 115 } 116 117 @Override 118 public Result createValue() { 119 return new Result(); 120 } 121 122 @Override 123 public long getPos() throws IOException { 124 return delegate.getPos(); 125 } 126 127 @Override 128 public void close() throws IOException { 129 delegate.close(); 130 } 131 132 @Override 133 public float getProgress() throws IOException { 134 return delegate.getProgress(); 135 } 136 } 137 138 @Override 139 public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { 140 List<TableSnapshotInputFormatImpl.InputSplit> splits = 141 TableSnapshotInputFormatImpl.getSplits(job); 142 InputSplit[] results = new InputSplit[splits.size()]; 143 for (int i = 0; i < splits.size(); i++) { 144 results[i] = new TableSnapshotRegionSplit(splits.get(i)); 145 } 146 return results; 147 } 148 149 @Override 150 public RecordReader<ImmutableBytesWritable, Result> 151 getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { 152 return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job); 153 } 154 155 /** 156 * Configures the job to use TableSnapshotInputFormat to read from a snapshot. 157 * @param job the job to configure 158 * @param snapshotName the name of the snapshot to read from 159 * @param restoreDir a temporary directory to restore the snapshot into. Current user should 160 * have write permissions to this directory, and this should not be a subdirectory of rootdir. 161 * After the job is finished, restoreDir can be deleted. 162 * @throws IOException if an error occurs 163 */ 164 public static void setInput(JobConf job, String snapshotName, Path restoreDir) 165 throws IOException { 166 TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir); 167 } 168 169 /** 170 * Configures the job to use TableSnapshotInputFormat to read from a snapshot. 171 * @param job the job to configure 172 * @param snapshotName the name of the snapshot to read from 173 * @param restoreDir a temporary directory to restore the snapshot into. Current user should 174 * have write permissions to this directory, and this should not be a subdirectory of rootdir. 175 * After the job is finished, restoreDir can be deleted. 176 * @param splitAlgo split algorithm to generate splits from region 177 * @param numSplitsPerRegion how many input splits to generate per one region 178 * @throws IOException if an error occurs 179 */ 180 public static void setInput(JobConf job, String snapshotName, Path restoreDir, 181 RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException { 182 TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo, numSplitsPerRegion); 183 } 184}