001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.mapred;
020
021import org.apache.hadoop.fs.Path;
022import org.apache.hadoop.hbase.HRegionInfo;
023import org.apache.hadoop.hbase.HTableDescriptor;
024import org.apache.yetus.audience.InterfaceAudience;
025import org.apache.hadoop.hbase.client.Result;
026import org.apache.hadoop.hbase.client.Scan;
027import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
028import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
029import org.apache.hadoop.hbase.util.RegionSplitter;
030import org.apache.hadoop.mapred.InputFormat;
031import org.apache.hadoop.mapred.InputSplit;
032import org.apache.hadoop.mapred.JobConf;
033import org.apache.hadoop.mapred.RecordReader;
034import org.apache.hadoop.mapred.Reporter;
035import org.apache.hadoop.mapreduce.Job;
036
037import java.io.DataInput;
038import java.io.DataOutput;
039import java.io.IOException;
040import java.util.List;
041
042/**
043 * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
044 * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
045 *
046 * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
047 */
048@InterfaceAudience.Public
049public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
050
051  public static class TableSnapshotRegionSplit implements InputSplit {
052    private TableSnapshotInputFormatImpl.InputSplit delegate;
053
054    // constructor for mapreduce framework / Writable
055    public TableSnapshotRegionSplit() {
056      this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
057    }
058
059    public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
060      this.delegate = delegate;
061    }
062
063    public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
064        List<String> locations, Scan scan, Path restoreDir) {
065      this.delegate =
066          new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
067    }
068
069    @Override
070    public long getLength() throws IOException {
071      return delegate.getLength();
072    }
073
074    @Override
075    public String[] getLocations() throws IOException {
076      return delegate.getLocations();
077    }
078
079    @Override
080    public void write(DataOutput out) throws IOException {
081      delegate.write(out);
082    }
083
084    @Override
085    public void readFields(DataInput in) throws IOException {
086      delegate.readFields(in);
087    }
088  }
089
090  static class TableSnapshotRecordReader
091    implements RecordReader<ImmutableBytesWritable, Result> {
092
093    private TableSnapshotInputFormatImpl.RecordReader delegate;
094
095    public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
096        throws IOException {
097      delegate = new TableSnapshotInputFormatImpl.RecordReader();
098      delegate.initialize(split.delegate, job);
099    }
100
101    @Override
102    public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
103      if (!delegate.nextKeyValue()) {
104        return false;
105      }
106      ImmutableBytesWritable currentKey = delegate.getCurrentKey();
107      key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
108      value.copyFrom(delegate.getCurrentValue());
109      return true;
110    }
111
112    @Override
113    public ImmutableBytesWritable createKey() {
114      return new ImmutableBytesWritable();
115    }
116
117    @Override
118    public Result createValue() {
119      return new Result();
120    }
121
122    @Override
123    public long getPos() throws IOException {
124      return delegate.getPos();
125    }
126
127    @Override
128    public void close() throws IOException {
129      delegate.close();
130    }
131
132    @Override
133    public float getProgress() throws IOException {
134      return delegate.getProgress();
135    }
136  }
137
138  @Override
139  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
140    List<TableSnapshotInputFormatImpl.InputSplit> splits =
141      TableSnapshotInputFormatImpl.getSplits(job);
142    InputSplit[] results = new InputSplit[splits.size()];
143    for (int i = 0; i < splits.size(); i++) {
144      results[i] = new TableSnapshotRegionSplit(splits.get(i));
145    }
146    return results;
147  }
148
149  @Override
150  public RecordReader<ImmutableBytesWritable, Result>
151  getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
152    return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
153  }
154
155  /**
156   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
157   * @param job the job to configure
158   * @param snapshotName the name of the snapshot to read from
159   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
160   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
161   * After the job is finished, restoreDir can be deleted.
162   * @throws IOException if an error occurs
163   */
164  public static void setInput(JobConf job, String snapshotName, Path restoreDir)
165      throws IOException {
166    TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
167  }
168
169  /**
170   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
171   * @param job the job to configure
172   * @param snapshotName the name of the snapshot to read from
173   * @param restoreDir a temporary directory to restore the snapshot into. Current user should
174   * have write permissions to this directory, and this should not be a subdirectory of rootdir.
175   * After the job is finished, restoreDir can be deleted.
176   * @param splitAlgo split algorithm to generate splits from region
177   * @param numSplitsPerRegion how many input splits to generate per one region
178   * @throws IOException if an error occurs
179   */
180  public static void setInput(JobConf job, String snapshotName, Path restoreDir,
181                              RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
182    TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo, numSplitsPerRegion);
183  }
184}