001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.filter; 019 020import java.util.ArrayList; 021import java.util.Collections; 022import java.util.List; 023 024import org.apache.hadoop.hbase.Cell; 025import org.apache.hadoop.hbase.CellUtil; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.PrivateCellUtil; 028import org.apache.yetus.audience.InterfaceAudience; 029import org.apache.hadoop.hbase.exceptions.DeserializationException; 030import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException; 031import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; 032import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; 033import org.apache.hadoop.hbase.util.Bytes; 034 035/** 036 * Filter to support scan multiple row key ranges. It can construct the row key ranges from the 037 * passed list which can be accessed by each region server. 038 * 039 * HBase is quite efficient when scanning only one small row key range. If user needs to specify 040 * multiple row key ranges in one scan, the typical solutions are: 1. through FilterList which is a 041 * list of row key Filters, 2. using the SQL layer over HBase to join with two table, such as hive, 042 * phoenix etc. However, both solutions are inefficient. Both of them can't utilize the range info 043 * to perform fast forwarding during scan which is quite time consuming. If the number of ranges 044 * are quite big (e.g. millions), join is a proper solution though it is slow. However, there are 045 * cases that user wants to specify a small number of ranges to scan (e.g. <1000 ranges). Both 046 * solutions can't provide satisfactory performance in such case. MultiRowRangeFilter is to support 047 * such usec ase (scan multiple row key ranges), which can construct the row key ranges from user 048 * specified list and perform fast-forwarding during scan. Thus, the scan will be quite efficient. 049 */ 050@InterfaceAudience.Public 051public class MultiRowRangeFilter extends FilterBase { 052 053 private List<RowRange> rangeList; 054 055 private static final int ROW_BEFORE_FIRST_RANGE = -1; 056 private boolean EXCLUSIVE = false; 057 private boolean done = false; 058 private boolean initialized = false; 059 private int index; 060 private RowRange range; 061 private ReturnCode currentReturnCode; 062 063 /** 064 * @param list A list of <code>RowRange</code> 065 */ 066 public MultiRowRangeFilter(List<RowRange> list) { 067 this.rangeList = sortAndMerge(list); 068 } 069 070 @Override 071 public boolean filterAllRemaining() { 072 return done; 073 } 074 075 public List<RowRange> getRowRanges() { 076 return this.rangeList; 077 } 078 079 @Override 080 public boolean filterRowKey(Cell firstRowCell) { 081 if (filterAllRemaining()) return true; 082 // If it is the first time of running, calculate the current range index for 083 // the row key. If index is out of bound which happens when the start row 084 // user sets is after the largest stop row of the ranges, stop the scan. 085 // If row key is after the current range, find the next range and update index. 086 byte[] rowArr = firstRowCell.getRowArray(); 087 int length = firstRowCell.getRowLength(); 088 int offset = firstRowCell.getRowOffset(); 089 if (!initialized 090 || !range.contains(rowArr, offset, length)) { 091 byte[] rowkey = CellUtil.cloneRow(firstRowCell); 092 index = getNextRangeIndex(rowkey); 093 if (index >= rangeList.size()) { 094 done = true; 095 currentReturnCode = ReturnCode.NEXT_ROW; 096 return false; 097 } 098 if(index != ROW_BEFORE_FIRST_RANGE) { 099 range = rangeList.get(index); 100 } else { 101 range = rangeList.get(0); 102 } 103 if (EXCLUSIVE) { 104 EXCLUSIVE = false; 105 currentReturnCode = ReturnCode.NEXT_ROW; 106 return false; 107 } 108 if (!initialized) { 109 if(index != ROW_BEFORE_FIRST_RANGE) { 110 currentReturnCode = ReturnCode.INCLUDE; 111 } else { 112 currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; 113 } 114 initialized = true; 115 } else { 116 if (range.contains(rowArr, offset, length)) { 117 currentReturnCode = ReturnCode.INCLUDE; 118 } else { 119 currentReturnCode = ReturnCode.SEEK_NEXT_USING_HINT; 120 } 121 } 122 } else { 123 currentReturnCode = ReturnCode.INCLUDE; 124 } 125 return false; 126 } 127 128 @Deprecated 129 @Override 130 public ReturnCode filterKeyValue(final Cell ignored) { 131 return filterCell(ignored); 132 } 133 134 @Override 135 public ReturnCode filterCell(final Cell ignored) { 136 return currentReturnCode; 137 } 138 139 @Override 140 public Cell getNextCellHint(Cell currentKV) { 141 // skip to the next range's start row 142 return PrivateCellUtil.createFirstOnRow(range.startRow, 0, 143 (short) range.startRow.length); 144 } 145 146 /** 147 * @return The filter serialized using pb 148 */ 149 @Override 150 public byte[] toByteArray() { 151 FilterProtos.MultiRowRangeFilter.Builder builder = FilterProtos.MultiRowRangeFilter 152 .newBuilder(); 153 for (RowRange range : rangeList) { 154 if (range != null) { 155 FilterProtos.RowRange.Builder rangebuilder = FilterProtos.RowRange.newBuilder(); 156 if (range.startRow != null) 157 rangebuilder.setStartRow(UnsafeByteOperations.unsafeWrap(range.startRow)); 158 rangebuilder.setStartRowInclusive(range.startRowInclusive); 159 if (range.stopRow != null) 160 rangebuilder.setStopRow(UnsafeByteOperations.unsafeWrap(range.stopRow)); 161 rangebuilder.setStopRowInclusive(range.stopRowInclusive); 162 builder.addRowRangeList(rangebuilder.build()); 163 } 164 } 165 return builder.build().toByteArray(); 166 } 167 168 /** 169 * @param pbBytes A pb serialized instance 170 * @return An instance of MultiRowRangeFilter 171 * @throws org.apache.hadoop.hbase.exceptions.DeserializationException 172 */ 173 public static MultiRowRangeFilter parseFrom(final byte[] pbBytes) 174 throws DeserializationException { 175 FilterProtos.MultiRowRangeFilter proto; 176 try { 177 proto = FilterProtos.MultiRowRangeFilter.parseFrom(pbBytes); 178 } catch (InvalidProtocolBufferException e) { 179 throw new DeserializationException(e); 180 } 181 int length = proto.getRowRangeListCount(); 182 List<FilterProtos.RowRange> rangeProtos = proto.getRowRangeListList(); 183 List<RowRange> rangeList = new ArrayList<>(length); 184 for (FilterProtos.RowRange rangeProto : rangeProtos) { 185 RowRange range = new RowRange(rangeProto.hasStartRow() ? rangeProto.getStartRow() 186 .toByteArray() : null, rangeProto.getStartRowInclusive(), rangeProto.hasStopRow() ? 187 rangeProto.getStopRow().toByteArray() : null, rangeProto.getStopRowInclusive()); 188 rangeList.add(range); 189 } 190 return new MultiRowRangeFilter(rangeList); 191 } 192 193 /** 194 * @param o the filter to compare 195 * @return true if and only if the fields of the filter that are serialized are equal to the 196 * corresponding fields in other. Used for testing. 197 */ 198 @Override 199 boolean areSerializedFieldsEqual(Filter o) { 200 if (o == this) 201 return true; 202 if (!(o instanceof MultiRowRangeFilter)) 203 return false; 204 205 MultiRowRangeFilter other = (MultiRowRangeFilter) o; 206 if (this.rangeList.size() != other.rangeList.size()) 207 return false; 208 for (int i = 0; i < rangeList.size(); ++i) { 209 RowRange thisRange = this.rangeList.get(i); 210 RowRange otherRange = other.rangeList.get(i); 211 if (!(Bytes.equals(thisRange.startRow, otherRange.startRow) && Bytes.equals( 212 thisRange.stopRow, otherRange.stopRow) && (thisRange.startRowInclusive == 213 otherRange.startRowInclusive) && (thisRange.stopRowInclusive == 214 otherRange.stopRowInclusive))) { 215 return false; 216 } 217 } 218 return true; 219 } 220 221 /** 222 * calculate the position where the row key in the ranges list. 223 * 224 * @param rowKey the row key to calculate 225 * @return index the position of the row key 226 */ 227 private int getNextRangeIndex(byte[] rowKey) { 228 RowRange temp = new RowRange(rowKey, true, null, true); 229 int index = Collections.binarySearch(rangeList, temp); 230 if (index < 0) { 231 int insertionPosition = -index - 1; 232 // check if the row key in the range before the insertion position 233 if (insertionPosition != 0 && rangeList.get(insertionPosition - 1).contains(rowKey)) { 234 return insertionPosition - 1; 235 } 236 // check if the row key is before the first range 237 if (insertionPosition == 0 && !rangeList.get(insertionPosition).contains(rowKey)) { 238 return ROW_BEFORE_FIRST_RANGE; 239 } 240 if (!initialized) { 241 initialized = true; 242 } 243 return insertionPosition; 244 } 245 // the row key equals one of the start keys, and the the range exclude the start key 246 if(rangeList.get(index).startRowInclusive == false) { 247 EXCLUSIVE = true; 248 } 249 return index; 250 } 251 252 /** 253 * sort the ranges and if the ranges with overlap, then merge them. 254 * 255 * @param ranges the list of ranges to sort and merge. 256 * @return the ranges after sort and merge. 257 */ 258 public static List<RowRange> sortAndMerge(List<RowRange> ranges) { 259 if (ranges.isEmpty()) { 260 throw new IllegalArgumentException("No ranges found."); 261 } 262 List<RowRange> invalidRanges = new ArrayList<>(); 263 List<RowRange> newRanges = new ArrayList<>(ranges.size()); 264 Collections.sort(ranges); 265 if(ranges.get(0).isValid()) { 266 if (ranges.size() == 1) { 267 newRanges.add(ranges.get(0)); 268 } 269 } else { 270 invalidRanges.add(ranges.get(0)); 271 } 272 273 byte[] lastStartRow = ranges.get(0).startRow; 274 boolean lastStartRowInclusive = ranges.get(0).startRowInclusive; 275 byte[] lastStopRow = ranges.get(0).stopRow; 276 boolean lastStopRowInclusive = ranges.get(0).stopRowInclusive; 277 int i = 1; 278 for (; i < ranges.size(); i++) { 279 RowRange range = ranges.get(i); 280 if (!range.isValid()) { 281 invalidRanges.add(range); 282 } 283 if(Bytes.equals(lastStopRow, HConstants.EMPTY_BYTE_ARRAY)) { 284 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 285 lastStopRowInclusive)); 286 break; 287 } 288 // with overlap in the ranges 289 if ((Bytes.compareTo(lastStopRow, range.startRow) > 0) || 290 (Bytes.compareTo(lastStopRow, range.startRow) == 0 && !(lastStopRowInclusive == false && 291 range.isStartRowInclusive() == false))) { 292 if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) { 293 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, range.stopRow, 294 range.stopRowInclusive)); 295 break; 296 } 297 // if first range contains second range, ignore the second range 298 if (Bytes.compareTo(lastStopRow, range.stopRow) >= 0) { 299 if((Bytes.compareTo(lastStopRow, range.stopRow) == 0)) { 300 if(lastStopRowInclusive == true || range.stopRowInclusive == true) { 301 lastStopRowInclusive = true; 302 } 303 } 304 if ((i + 1) == ranges.size()) { 305 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 306 lastStopRowInclusive)); 307 } 308 } else { 309 lastStopRow = range.stopRow; 310 lastStopRowInclusive = range.stopRowInclusive; 311 if ((i + 1) < ranges.size()) { 312 i++; 313 range = ranges.get(i); 314 if (!range.isValid()) { 315 invalidRanges.add(range); 316 } 317 } else { 318 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 319 lastStopRowInclusive)); 320 break; 321 } 322 while ((Bytes.compareTo(lastStopRow, range.startRow) > 0) || 323 (Bytes.compareTo(lastStopRow, range.startRow) == 0 && 324 (lastStopRowInclusive == true || range.startRowInclusive==true))) { 325 if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) { 326 break; 327 } 328 // if this first range contain second range, ignore the second range 329 if (Bytes.compareTo(lastStopRow, range.stopRow) >= 0) { 330 if(lastStopRowInclusive == true || range.stopRowInclusive == true) { 331 lastStopRowInclusive = true; 332 } 333 i++; 334 if (i < ranges.size()) { 335 range = ranges.get(i); 336 if (!range.isValid()) { 337 invalidRanges.add(range); 338 } 339 } else { 340 break; 341 } 342 } else { 343 lastStopRow = range.stopRow; 344 lastStopRowInclusive = range.stopRowInclusive; 345 i++; 346 if (i < ranges.size()) { 347 range = ranges.get(i); 348 if (!range.isValid()) { 349 invalidRanges.add(range); 350 } 351 } else { 352 break; 353 } 354 } 355 } 356 if(Bytes.equals(range.stopRow, HConstants.EMPTY_BYTE_ARRAY)) { 357 if((Bytes.compareTo(lastStopRow, range.startRow) < 0) || 358 (Bytes.compareTo(lastStopRow, range.startRow) == 0 && 359 lastStopRowInclusive == false && range.startRowInclusive == false)) { 360 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 361 lastStopRowInclusive)); 362 newRanges.add(range); 363 } else { 364 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, range.stopRow, 365 range.stopRowInclusive)); 366 break; 367 } 368 } 369 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 370 lastStopRowInclusive)); 371 if ((i + 1) == ranges.size()) { 372 newRanges.add(range); 373 } 374 lastStartRow = range.startRow; 375 lastStartRowInclusive = range.startRowInclusive; 376 lastStopRow = range.stopRow; 377 lastStopRowInclusive = range.stopRowInclusive; 378 } 379 } else { 380 newRanges.add(new RowRange(lastStartRow, lastStartRowInclusive, lastStopRow, 381 lastStopRowInclusive)); 382 if ((i + 1) == ranges.size()) { 383 newRanges.add(range); 384 } 385 lastStartRow = range.startRow; 386 lastStartRowInclusive = range.startRowInclusive; 387 lastStopRow = range.stopRow; 388 lastStopRowInclusive = range.stopRowInclusive; 389 } 390 } 391 // check the remaining ranges 392 for(int j=i; j < ranges.size(); j++) { 393 if(!ranges.get(j).isValid()) { 394 invalidRanges.add(ranges.get(j)); 395 } 396 } 397 // if invalid range exists, throw the exception 398 if (invalidRanges.size() != 0) { 399 throwExceptionForInvalidRanges(invalidRanges, true); 400 } 401 // If no valid ranges found, throw the exception 402 if(newRanges.isEmpty()) { 403 throw new IllegalArgumentException("No valid ranges found."); 404 } 405 return newRanges; 406 } 407 408 private static void throwExceptionForInvalidRanges(List<RowRange> invalidRanges, 409 boolean details) { 410 StringBuilder sb = new StringBuilder(); 411 sb.append(invalidRanges.size()).append(" invaild ranges.\n"); 412 if (details) { 413 for (RowRange range : invalidRanges) { 414 sb.append( 415 "Invalid range: start row => " + Bytes.toString(range.startRow) + ", stop row => " 416 + Bytes.toString(range.stopRow)).append('\n'); 417 } 418 } 419 throw new IllegalArgumentException(sb.toString()); 420 } 421 422 @InterfaceAudience.Public 423 public static class RowRange implements Comparable<RowRange> { 424 private byte[] startRow; 425 private boolean startRowInclusive = true; 426 private byte[] stopRow; 427 private boolean stopRowInclusive = false; 428 429 public RowRange() { 430 } 431 /** 432 * If the startRow is empty or null, set it to HConstants.EMPTY_BYTE_ARRAY, means begin at the 433 * start row of the table. If the stopRow is empty or null, set it to 434 * HConstants.EMPTY_BYTE_ARRAY, means end of the last row of table. 435 */ 436 public RowRange(String startRow, boolean startRowInclusive, String stopRow, 437 boolean stopRowInclusive) { 438 this((startRow == null || startRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : 439 Bytes.toBytes(startRow), startRowInclusive, 440 (stopRow == null || stopRow.isEmpty()) ? HConstants.EMPTY_BYTE_ARRAY : 441 Bytes.toBytes(stopRow), stopRowInclusive); 442 } 443 444 public RowRange(byte[] startRow, boolean startRowInclusive, byte[] stopRow, 445 boolean stopRowInclusive) { 446 this.startRow = (startRow == null) ? HConstants.EMPTY_BYTE_ARRAY : startRow; 447 this.startRowInclusive = startRowInclusive; 448 this.stopRow = (stopRow == null) ? HConstants.EMPTY_BYTE_ARRAY :stopRow; 449 this.stopRowInclusive = stopRowInclusive; 450 } 451 452 public byte[] getStartRow() { 453 return startRow; 454 } 455 456 public byte[] getStopRow() { 457 return stopRow; 458 } 459 460 /** 461 * @return if start row is inclusive. 462 */ 463 public boolean isStartRowInclusive() { 464 return startRowInclusive; 465 } 466 467 /** 468 * @return if stop row is inclusive. 469 */ 470 public boolean isStopRowInclusive() { 471 return stopRowInclusive; 472 } 473 474 public boolean contains(byte[] row) { 475 return contains(row, 0, row.length); 476 } 477 478 public boolean contains(byte[] buffer, int offset, int length) { 479 if(startRowInclusive) { 480 if(stopRowInclusive) { 481 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) >= 0 482 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 483 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) <= 0); 484 } else { 485 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) >= 0 486 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 487 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) < 0); 488 } 489 } else { 490 if(stopRowInclusive) { 491 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) > 0 492 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 493 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) <= 0); 494 } else { 495 return Bytes.compareTo(buffer, offset, length, startRow, 0, startRow.length) > 0 496 && (Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) || 497 Bytes.compareTo(buffer, offset, length, stopRow, 0, stopRow.length) < 0); 498 } 499 } 500 } 501 502 @Override 503 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_COMPARETO_USE_OBJECT_EQUALS", 504 justification="This compareTo is not of this Object, but of referenced RowRange") 505 public int compareTo(RowRange other) { 506 return Bytes.compareTo(this.startRow, other.startRow); 507 } 508 509 public boolean isValid() { 510 return Bytes.equals(startRow, HConstants.EMPTY_BYTE_ARRAY) 511 || Bytes.equals(stopRow, HConstants.EMPTY_BYTE_ARRAY) 512 || Bytes.compareTo(startRow, stopRow) < 0 513 || (Bytes.compareTo(startRow, stopRow) == 0 && stopRowInclusive == true); 514 } 515 } 516}