001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.util.ArrayList;
021import java.util.Collections;
022import java.util.List;
023
024import org.apache.hadoop.hbase.Cell;
025import org.apache.hadoop.hbase.CellUtil;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.yetus.audience.InterfaceAudience;
029import org.apache.hadoop.hbase.exceptions.DeserializationException;
030import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
031import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
032import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
033import org.apache.hadoop.hbase.util.Bytes;
034
035/**
036 * Filter to support scan multiple row key ranges. It can construct the row key ranges from the
037 * passed list which can be accessed by each region server.
038 *
039 * HBase is quite efficient when scanning only one small row key range. If user needs to specify
040 * multiple row key ranges in one scan, the typical solutions are: 1. through FilterList which is a
041 * list of row key Filters, 2. using the SQL layer over HBase to join with two table, such as hive,
042 * phoenix etc. However, both solutions are inefficient. Both of them can't utilize the range info
043 * to perform fast forwarding during scan which is quite time consuming. If the number of ranges
044 * are quite big (e.g. millions), join is a proper solution though it is slow. However, there are
045 * cases that user wants to specify a small number of ranges to scan (e.g. <1000 ranges). Both
046 * solutions can't provide satisfactory performance in such case. MultiRowRangeFilter is to support
047 * such usec ase (scan multiple row key ranges), which can construct the row key ranges from user
048 * specified list and perform fast-forwarding during scan. Thus, the scan will be quite efficient.
049 */
050@InterfaceAudience.Public
051public class MultiRowRangeFilter extends FilterBase {
052
053  private List<RowRange> rangeList;
054
055  private static final int ROW_BEFORE_FIRST_RANGE = -1;
056  private boolean EXCLUSIVE = false;
057  private boolean done = false;
058  private boolean initialized = false;
059  private int index;
060  private RowRange range;
061  private ReturnCode currentReturnCode;
062
063  /**
064   * @param list A list of <code>RowRange</code>
065   */
066  public MultiRowRangeFilter(List<RowRange> list) {
067    this.rangeList = sortAndMerge(list);
068  }
069
070  @Override
071  public boolean filterAllRemaining() {
072    return done;
073  }
074
075  public List<RowRange> getRowRanges() {
076    return this.rangeList;
077  }
078
079  @Override
080  public boolean filterRowKey(Cell firstRowCell) {
081    if (filterAllRemaining()) return true;
082    // If it is the first time of running, calculate the current range index for
083    // the row key. If index is out of bound which happens when the start row
084    // user sets is after the largest stop row of the ranges, stop the scan.
085    // If row key is after the current range, find the next range and update index.
086    byte[] rowArr = firstRowCell.getRowArray();
087    int length = firstRowCell.getRowLength();
088    int offset = firstRowCell.getRowOffset();
089    if (!initialized
090        || !range.contains(rowArr, offset, length)) {
091      byte[] rowkey = CellUtil.cloneRow(firstRowCell);
092      index = getNextRangeIndex(rowkey);
093      if (index >= rangeList.size()) {
094        done = true;
095        currentReturnCode = ReturnCode.NEXT_ROW;
096        return false;
097      }
098      if(index != ROW_BEFORE_FIRST_RANGE) {
099        range = rangeList.get(index);
100      } else {
101        range = rangeList.get(0);
102      }
103      if (EXCLUSIVE) {
104        EXCLUSIVE = false;
105        currentReturnCode = ReturnCode.NEXT_ROW;
106        return false;
107      }
108      if (!initialized) {
109        if(index != ROW_BEFORE_FIRST_RANGE) {
110          currentReturnCode = ReturnCode.INCLUDE;
111        } else {
112          currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT;
113        }
114        initialized = true;
115      } else {
116        if (range.contains(rowArr, offset, length)) {
117          currentReturnCode = ReturnCode.INCLUDE;
118        } else {
119          currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT;
120        }
121      }
122    } else {
123      currentReturnCode = ReturnCode.INCLUDE;
124    }
125    return false;
126  }
127
128  @Deprecated
129  @Override
130  public ReturnCode filterKeyValue(final Cell ignored) {
131    return filterCell(ignored);
132  }
133
134  @Override
135  public ReturnCode filterCell(final Cell ignored) {
136    return currentReturnCode;
137  }
138
139  @Override
140  public Cell getNextCellHint(Cell currentKV) {
141    // skip to the next range's start row
142    return PrivateCellUtil.createFirstOnRow(range.startRow, 0,
143        (short) range.startRow.length);
144  }
145
146  /**
147   * @return The filter serialized using pb
148   */
149  @Override
150  public byte[] toByteArray() {
151    FilterProtos.MultiRowRangeFilter.Builder builder = FilterProtos.MultiRowRangeFilter
152        .newBuilder();
153    for (RowRange range : rangeList) {
154      if (range != null) {
155        FilterProtos.RowRange.Builder rangebuilder = FilterProtos.RowRange.newBuilder();
156        if (range.startRow != null)
157          rangebuilder.setStartRow(UnsafeByteOperations.unsafeWrap(range.startRow));
158        rangebuilder.setStartRowInclusive(range.startRowInclusive);
159        if (range.stopRow != null)
160          rangebuilder.setStopRow(UnsafeByteOperations.unsafeWrap(range.stopRow));
161        rangebuilder.setStopRowInclusive(range.stopRowInclusive);
162        builder.addRowRangeList(rangebuilder.build());
163      }
164    }
165    return builder.build().toByteArray();
166  }
167
168  /**
169   * @param pbBytes A pb serialized instance
170   * @return An instance of MultiRowRangeFilter
171   * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
172   */
173  public static MultiRowRangeFilter parseFrom(final byte[] pbBytes)
174      throws DeserializationException {
175    FilterProtos.MultiRowRangeFilter proto;
176    try {
177      proto = FilterProtos.MultiRowRangeFilter.parseFrom(pbBytes);
178    } catch (InvalidProtocolBufferException e) {
179      throw new DeserializationException(e);
180    }
181    int length = proto.getRowRangeListCount();
182    List<FilterProtos.RowRange> rangeProtos = proto.getRowRangeListList();
183    List<RowRange> rangeList = new ArrayList<>(length);
184    for (FilterProtos.RowRange rangeProto : rangeProtos) {
185      RowRange range = new RowRange(rangeProto.hasStartRow() ? rangeProto.getStartRow()
186          .toByteArray() : null, rangeProto.getStartRowInclusive(), rangeProto.hasStopRow() ?
187              rangeProto.getStopRow().toByteArray() : null, rangeProto.getStopRowInclusive());
188      rangeList.add(range);
189    }
190    return new MultiRowRangeFilter(rangeList);
191  }
192
193  /**
194   * @param o the filter to compare
195   * @return true if and only if the fields of the filter that are serialized are equal to the
196   *         corresponding fields in other. Used for testing.
197   */
198  @Override
199  boolean areSerializedFieldsEqual(Filter o) {
200    if (o == this)
201      return true;
202    if (!(o instanceof MultiRowRangeFilter))
203      return false;
204
205    MultiRowRangeFilter other = (MultiRowRangeFilter) o;
206    if (this.rangeList.size() != other.rangeList.size())
207      return false;
208    for (int i = 0; i < rangeList.size(); ++i) {
209      RowRange thisRange = this.rangeList.get(i);
210      RowRange otherRange = other.rangeList.get(i);
211      if (!(Bytes.equals(thisRange.startRow, otherRange.startRow) && Bytes.equals(
212          thisRange.stopRow, otherRange.stopRow) && (thisRange.startRowInclusive ==
213          otherRange.startRowInclusive) && (thisRange.stopRowInclusive ==
214          otherRange.stopRowInclusive))) {
215        return false;
216      }
217    }
218    return true;
219  }
220
221  /**
222   * calculate the position where the row key in the ranges list.
223   *
224   * @param rowKey the row key to calculate
225   * @return index the position of the row key
226   */
227  private int getNextRangeIndex(byte[] rowKey) {
228    RowRange temp = new RowRange(rowKey, true, null, true);
229    int index = Collections.binarySearch(rangeList, temp);
230    if (index < 0) {
231      int insertionPosition = -index - 1;
232      // check if the row key in the range before the insertion position
233      if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) {
234        return insertionPosition - 1;
235      }
236      // check if the row key is before the first range
237      if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) {
238        return ROW_BEFORE_FIRST_RANGE;
239      }
240      if (!initialized) {
241        initialized = true;
242      }
243      return insertionPosition;
244    }
245    // the row key equals one of the start keys, and the the range exclude the start key
246    if(rangeList.get(index).startRowInclusive == false) {
247      EXCLUSIVE = true;
248    }
249    return index;
250  }
251
252  /**
253   * sort the ranges and if the ranges with overlap, then merge them.
254   *
255   * @param ranges the list of ranges to sort and merge.
256   * @return the ranges after sort and merge.
257   */
258  public static List<RowRange> sortAndMerge(List<RowRange> ranges) {
259    if (ranges.isEmpty()) {
260      throw new IllegalArgumentException("No ranges found.");
261    }
262    List<RowRange> invalidRanges = new ArrayList<>();
263    List<RowRange> newRanges = new ArrayList<>(ranges.size());
264    Collections.sort(ranges);
265    if(ranges.get(0).isValid()) {
266      if (ranges.size() == 1) {
267        newRanges.add(ranges.get(0));
268      }
269    } else {
270      invalidRanges.add(ranges.get(0));
271    }
272
273    byte[] lastStartRow = ranges.get(0).startRow;
274    boolean lastStartRowInclusive = ranges.get(0).startRowInclusive;
275    byte[] lastStopRow = ranges.get(0).stopRow;
276    boolean lastStopRowInclusive = ranges.get(0).stopRowInclusive;
277    int i = 1;
278    for (; i < ranges.size(); i++) {
279      RowRange range = ranges.get(i);
280      if (!range.isValid()) {
281        invalidRanges.add(range);
282      }
283      if(Bytes.equals(lastStopRow, HConstants.EMPTY_BYTE_ARRAY)) {
284        newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow,
285            lastStopRowInclusive));
286        break;
287      }
288      // with overlap in the ranges
289      if ((Bytes.compareTo(lastStopRow, range.startRow) > 0) ||
290          (Bytes.compareTo(lastStopRow, range.startRow) == 0 && !(lastStopRowInclusive == false &&
291          range.isStartRowInclusive() == false))) {
292        if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) {
293          newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, range.stopRow,
294              range.stopRowInclusive));
295          break;
296        }
297        // if first range contains second range, ignore the second range
298        if (Bytes.compareTo(lastStopRow, range.stopRow) >= 0) {
299          if((Bytes.compareTo(lastStopRow, range.stopRow) == 0)) {
300            if(lastStopRowInclusive == true || range.stopRowInclusive == true) {
301              lastStopRowInclusive = true;
302            }
303          }
304          if ((i + 1) == ranges.size()) {
305            newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow,
306                lastStopRowInclusive));
307          }
308        } else {
309          lastStopRow = range.stopRow;
310          lastStopRowInclusive = range.stopRowInclusive;
311          if ((i + 1) < ranges.size()) {
312            i++;
313            range = ranges.get(i);
314            if (!range.isValid()) {
315              invalidRanges.add(range);
316            }
317          } else {
318            newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow,
319                lastStopRowInclusive));
320            break;
321          }
322          while ((Bytes.compareTo(lastStopRow, range.startRow) > 0) ||
323              (Bytes.compareTo(lastStopRow, range.startRow) == 0 &&
324              (lastStopRowInclusive == true || range.startRowInclusive==true))) {
325            if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) {
326              break;
327            }
328            // if this first range contain second range, ignore the second range
329            if (Bytes.compareTo(lastStopRow, range.stopRow) >= 0) {
330              if(lastStopRowInclusive == true || range.stopRowInclusive == true) {
331                lastStopRowInclusive = true;
332              }
333              i++;
334              if (i < ranges.size()) {
335                range = ranges.get(i);
336                if (!range.isValid()) {
337                  invalidRanges.add(range);
338                }
339              } else {
340                break;
341              }
342            } else {
343              lastStopRow = range.stopRow;
344              lastStopRowInclusive = range.stopRowInclusive;
345              i++;
346              if (i < ranges.size()) {
347                range = ranges.get(i);
348                if (!range.isValid()) {
349                  invalidRanges.add(range);
350                }
351              } else {
352                break;
353              }
354            }
355          }
356          if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) {
357            if((Bytes.compareTo(lastStopRow, range.startRow) < 0) ||
358                (Bytes.compareTo(lastStopRow, range.startRow) == 0 &&
359                lastStopRowInclusive == false && range.startRowInclusive == false)) {
360              newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow,
361                  lastStopRowInclusive));
362              newRanges.add(range);
363            } else {
364              newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, range.stopRow,
365                  range.stopRowInclusive));
366              break;
367            }
368          }
369          newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow,
370              lastStopRowInclusive));
371          if ((i + 1) == ranges.size()) {
372            newRanges.add(range);
373          }
374          lastStartRow = range.startRow;
375          lastStartRowInclusive = range.startRowInclusive;
376          lastStopRow = range.stopRow;
377          lastStopRowInclusive = range.stopRowInclusive;
378        }
379      } else {
380        newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow,
381            lastStopRowInclusive));
382        if ((i + 1) == ranges.size()) {
383          newRanges.add(range);
384        }
385        lastStartRow = range.startRow;
386        lastStartRowInclusive = range.startRowInclusive;
387        lastStopRow = range.stopRow;
388        lastStopRowInclusive = range.stopRowInclusive;
389      }
390    }
391    // check the remaining ranges
392    for(int j=i; j < ranges.size(); j++) {
393      if(!ranges.get(j).isValid()) {
394        invalidRanges.add(ranges.get(j));
395      }
396    }
397    // if invalid range exists, throw the exception
398    if (invalidRanges.size() != 0) {
399      throwExceptionForInvalidRanges(invalidRanges, true);
400    }
401    // If no valid ranges found, throw the exception
402    if(newRanges.isEmpty()) {
403      throw new IllegalArgumentException("No valid ranges found.");
404    }
405    return newRanges;
406  }
407
408  private static void throwExceptionForInvalidRanges(List<RowRange> invalidRanges,
409      boolean details) {
410    StringBuilder sb = new StringBuilder();
411    sb.append(invalidRanges.size()).append(" invaild ranges.\n");
412    if (details) {
413      for (RowRange range : invalidRanges) {
414        sb.append(
415            "Invalid range: start row => " + Bytes.toString(range.startRow) + ", stop row => "
416                + Bytes.toString(range.stopRow)).append('\n');
417      }
418    }
419    throw new IllegalArgumentException(sb.toString());
420  }
421
422  @InterfaceAudience.Public
423  public static class RowRange implements Comparable<RowRange> {
424    private byte[] startRow;
425    private boolean startRowInclusive = true;
426    private byte[] stopRow;
427    private boolean stopRowInclusive = false;
428
429    public RowRange() {
430    }
431    /**
432     * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the
433     * start row of the table. If the stopRow is empty or null, set it to
434     * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table.
435     */
436    public RowRange(String startRow, boolean startRowInclusive, String stopRow,
437        boolean stopRowInclusive) {
438      this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY :
439        Bytes.toBytes(startRow), startRowInclusive,
440        (stopRow == null || stopRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY :
441        Bytes.toBytes(stopRow), stopRowInclusive);
442    }
443
444    public RowRange(byte[] startRow,  boolean startRowInclusive, byte[] stopRow,
445        boolean stopRowInclusive) {
446      this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow;
447      this.startRowInclusive = startRowInclusive;
448      this.stopRow = (stopRow == null) ? HConstants.EMPTY_BYTE_ARRAY :stopRow;
449      this.stopRowInclusive = stopRowInclusive;
450    }
451
452    public byte[] getStartRow() {
453      return startRow;
454    }
455
456    public byte[] getStopRow() {
457      return stopRow;
458    }
459
460    /**
461     * @return if start row is inclusive.
462     */
463    public boolean isStartRowInclusive() {
464      return startRowInclusive;
465    }
466
467    /**
468     * @return if stop row is inclusive.
469     */
470    public boolean isStopRowInclusive() {
471      return stopRowInclusive;
472    }
473
474    public boolean contains(byte[] row) {
475      return contains(row, 0, row.length);
476    }
477
478    public boolean contains(byte[] buffer, int offset, int length) {
479      if(startRowInclusive) {
480        if(stopRowInclusive) {
481          return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) >= 0
482              && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) ||
483                  Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) <= 0);
484        } else {
485          return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) >= 0
486              && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) ||
487                  Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) < 0);
488        }
489      } else {
490        if(stopRowInclusive) {
491          return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) > 0
492              && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) ||
493                  Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) <= 0);
494        } else {
495          return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) > 0
496              && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) ||
497                  Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) < 0);
498        }
499      }
500    }
501
502    @Override
503    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS",
504      justification="This compareTo is not of this Object, but of referenced RowRange")
505    public int compareTo(RowRange other) {
506      return Bytes.compareTo(this.startRow, other.startRow);
507    }
508
509    public boolean isValid() {
510      return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY)
511          || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY)
512          || Bytes.compareTo(startRow, stopRow) < 0
513          || (Bytes.compareTo(startRow, stopRow) == 0 && stopRowInclusive == true);
514    }
515  }
516}