001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase; 020 021import java.io.DataInput; 022import java.io.DataOutput; 023import java.io.EOFException; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.OutputStream; 027import java.nio.ByteBuffer; 028import java.util.ArrayList; 029import java.util.List; 030 031import org.apache.hadoop.hbase.KeyValue.Type; 032import org.apache.hadoop.hbase.io.util.StreamUtils; 033import org.apache.hadoop.hbase.util.ByteBufferUtils; 034import org.apache.hadoop.hbase.util.Bytes; 035import org.apache.hadoop.io.IOUtils; 036import org.apache.hadoop.io.WritableUtils; 037import org.apache.yetus.audience.InterfaceAudience; 038 039import org.apache.hbase.thirdparty.com.google.common.base.Function; 040import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 041import org.apache.hbase.thirdparty.org.apache.commons.collections4.IterableUtils; 042 043/** 044 * static convenience methods for dealing with KeyValues and collections of KeyValues 045 */ 046@InterfaceAudience.Private 047public class KeyValueUtil { 048 049 /**************** length *********************/ 050 051 /** 052 * Returns number of bytes this cell would have been used if serialized as in {@link KeyValue} 053 * @param cell 054 * @return the length 055 */ 056 public static int length(final Cell cell) { 057 return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(), 058 cell.getValueLength(), cell.getTagsLength(), true); 059 } 060 061 public static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) { 062 if (withTags) { 063 return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen)); 064 } 065 return (int) (KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen)); 066 } 067 068 /** 069 * Returns number of bytes this cell's key part would have been used if serialized as in 070 * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type. 071 * @param cell 072 * @return the key length 073 */ 074 public static int keyLength(final Cell cell) { 075 return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength()); 076 } 077 078 private static int keyLength(short rlen, byte flen, int qlen) { 079 return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen); 080 } 081 082 public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) { 083 int length = kv.getLength(); 084 if (includeMvccVersion) { 085 length += WritableUtils.getVIntSize(kv.getSequenceId()); 086 } 087 return length; 088 } 089 090 public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs, 091 final boolean includeMvccVersion) { 092 int length = 0; 093 for (KeyValue kv : IterableUtils.emptyIfNull(kvs)) { 094 length += lengthWithMvccVersion(kv, includeMvccVersion); 095 } 096 return length; 097 } 098 099 100 /**************** copy the cell to create a new keyvalue *********************/ 101 102 public static KeyValue copyToNewKeyValue(final Cell cell) { 103 byte[] bytes = copyToNewByteArray(cell); 104 KeyValue kvCell = new KeyValue(bytes, 0, bytes.length); 105 kvCell.setSequenceId(cell.getSequenceId()); 106 return kvCell; 107 } 108 109 /** 110 * The position will be set to the beginning of the new ByteBuffer 111 * @param cell 112 * @return the Bytebuffer containing the key part of the cell 113 */ 114 public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) { 115 byte[] bytes = new byte[keyLength(cell)]; 116 appendKeyTo(cell, bytes, 0); 117 ByteBuffer buffer = ByteBuffer.wrap(bytes); 118 return buffer; 119 } 120 121 /** 122 * Copies the key to a new KeyValue 123 * @param cell 124 * @return the KeyValue that consists only the key part of the incoming cell 125 */ 126 public static KeyValue toNewKeyCell(final Cell cell) { 127 byte[] bytes = new byte[keyLength(cell)]; 128 appendKeyTo(cell, bytes, 0); 129 KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length); 130 // Set the seq id. The new key cell could be used in comparisons so it 131 // is important that it uses the seqid also. If not the comparsion would fail 132 kv.setSequenceId(cell.getSequenceId()); 133 return kv; 134 } 135 136 public static byte[] copyToNewByteArray(final Cell cell) { 137 int v1Length = length(cell); 138 byte[] backingBytes = new byte[v1Length]; 139 appendToByteArray(cell, backingBytes, 0, true); 140 return backingBytes; 141 } 142 143 public static int appendKeyTo(final Cell cell, final byte[] output, 144 final int offset) { 145 int nextOffset = offset; 146 nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength()); 147 nextOffset = CellUtil.copyRowTo(cell, output, nextOffset); 148 nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength()); 149 nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset); 150 nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset); 151 nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp()); 152 nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte()); 153 return nextOffset; 154 } 155 156 /**************** copy key and value *********************/ 157 158 public static int appendToByteArray(Cell cell, byte[] output, int offset, boolean withTags) { 159 int pos = offset; 160 pos = Bytes.putInt(output, pos, keyLength(cell)); 161 pos = Bytes.putInt(output, pos, cell.getValueLength()); 162 pos = appendKeyTo(cell, output, pos); 163 pos = CellUtil.copyValueTo(cell, output, pos); 164 if (withTags && (cell.getTagsLength() > 0)) { 165 pos = Bytes.putAsShort(output, pos, cell.getTagsLength()); 166 pos = PrivateCellUtil.copyTagsTo(cell, output, pos); 167 } 168 return pos; 169 } 170 171 /** 172 * Copy the Cell content into the passed buf in KeyValue serialization format. 173 */ 174 public static int appendTo(Cell cell, ByteBuffer buf, int offset, boolean withTags) { 175 offset = ByteBufferUtils.putInt(buf, offset, keyLength(cell));// Key length 176 offset = ByteBufferUtils.putInt(buf, offset, cell.getValueLength());// Value length 177 offset = appendKeyTo(cell, buf, offset); 178 offset = CellUtil.copyValueTo(cell, buf, offset);// Value bytes 179 int tagsLength = cell.getTagsLength(); 180 if (withTags && (tagsLength > 0)) { 181 offset = ByteBufferUtils.putAsShort(buf, offset, tagsLength);// Tags length 182 offset = PrivateCellUtil.copyTagsTo(cell, buf, offset);// Tags bytes 183 } 184 return offset; 185 } 186 187 public static int appendKeyTo(Cell cell, ByteBuffer buf, int offset) { 188 offset = ByteBufferUtils.putShort(buf, offset, cell.getRowLength());// RK length 189 offset = CellUtil.copyRowTo(cell, buf, offset);// Row bytes 190 offset = ByteBufferUtils.putByte(buf, offset, cell.getFamilyLength());// CF length 191 offset = CellUtil.copyFamilyTo(cell, buf, offset);// CF bytes 192 offset = CellUtil.copyQualifierTo(cell, buf, offset);// Qualifier bytes 193 offset = ByteBufferUtils.putLong(buf, offset, cell.getTimestamp());// TS 194 offset = ByteBufferUtils.putByte(buf, offset, cell.getTypeByte());// Type 195 return offset; 196 } 197 198 public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv, 199 final boolean includeMvccVersion) { 200 // keep pushing the limit out. assume enough capacity 201 bb.limit(bb.position() + kv.getLength()); 202 bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); 203 if (includeMvccVersion) { 204 int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId()); 205 ByteBufferUtils.extendLimit(bb, numMvccVersionBytes); 206 ByteBufferUtils.writeVLong(bb, kv.getSequenceId()); 207 } 208 } 209 210 211 /**************** iterating *******************************/ 212 213 /** 214 * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's 215 * position to the start of the next KeyValue. Does not allocate a new array or copy data. 216 * @param bb 217 * @param includesMvccVersion 218 * @param includesTags 219 */ 220 public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion, 221 boolean includesTags) { 222 if (bb.isDirect()) { 223 throw new IllegalArgumentException("only supports heap buffers"); 224 } 225 if (bb.remaining() < 1) { 226 return null; 227 } 228 KeyValue keyValue = null; 229 int underlyingArrayOffset = bb.arrayOffset() + bb.position(); 230 int keyLength = bb.getInt(); 231 int valueLength = bb.getInt(); 232 ByteBufferUtils.skip(bb, keyLength + valueLength); 233 int tagsLength = 0; 234 if (includesTags) { 235 // Read short as unsigned, high byte first 236 tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff); 237 ByteBufferUtils.skip(bb, tagsLength); 238 } 239 int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); 240 keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); 241 if (includesMvccVersion) { 242 long mvccVersion = ByteBufferUtils.readVLong(bb); 243 keyValue.setSequenceId(mvccVersion); 244 } 245 return keyValue; 246 } 247 248 249 /*************** next/previous **********************************/ 250 251 /** 252 * Decrement the timestamp. For tests (currently wasteful) 253 * 254 * Remember timestamps are sorted reverse chronologically. 255 * @param in 256 * @return previous key 257 */ 258 public static KeyValue previousKey(final KeyValue in) { 259 return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in), 260 CellUtil.cloneQualifier(in), in.getTimestamp() - 1); 261 } 262 263 264 /** 265 * Create a KeyValue for the specified row, family and qualifier that would be 266 * larger than or equal to all other possible KeyValues that have the same 267 * row, family, qualifier. Used for reseeking. Should NEVER be returned to a client. 268 * 269 * @param row 270 * row key 271 * @param roffset 272 * row offset 273 * @param rlength 274 * row length 275 * @param family 276 * family name 277 * @param foffset 278 * family offset 279 * @param flength 280 * family length 281 * @param qualifier 282 * column qualifier 283 * @param qoffset 284 * qualifier offset 285 * @param qlength 286 * qualifier length 287 * @return Last possible key on passed row, family, qualifier. 288 */ 289 public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength, 290 final byte[] family, final int foffset, final int flength, final byte[] qualifier, 291 final int qoffset, final int qlength) { 292 return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, 293 qlength, HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0); 294 } 295 296 /** 297 * Create a KeyValue that is smaller than all other possible KeyValues 298 * for the given row. That is any (valid) KeyValue on 'row' would sort 299 * _after_ the result. 300 * 301 * @param row - row key (arbitrary byte array) 302 * @return First possible KeyValue on passed <code>row</code> 303 */ 304 public static KeyValue createFirstOnRow(final byte [] row, int roffset, short rlength) { 305 return new KeyValue(row, roffset, rlength, 306 null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0); 307 } 308 309 /** 310 * Creates a KeyValue that is last on the specified row id. That is, 311 * every other possible KeyValue for the given row would compareTo() 312 * less than the result of this call. 313 * @param row row key 314 * @return Last possible KeyValue on passed <code>row</code> 315 */ 316 public static KeyValue createLastOnRow(final byte[] row) { 317 return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, Type.Minimum); 318 } 319 320 /** 321 * Create a KeyValue that is smaller than all other possible KeyValues 322 * for the given row. That is any (valid) KeyValue on 'row' would sort 323 * _after_ the result. 324 * 325 * @param row - row key (arbitrary byte array) 326 * @return First possible KeyValue on passed <code>row</code> 327 */ 328 public static KeyValue createFirstOnRow(final byte [] row) { 329 return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP); 330 } 331 332 /** 333 * Creates a KeyValue that is smaller than all other KeyValues that 334 * are older than the passed timestamp. 335 * @param row - row key (arbitrary byte array) 336 * @param ts - timestamp 337 * @return First possible key on passed <code>row</code> and timestamp. 338 */ 339 public static KeyValue createFirstOnRow(final byte [] row, 340 final long ts) { 341 return new KeyValue(row, null, null, ts, Type.Maximum); 342 } 343 344 /** 345 * Create a KeyValue for the specified row, family and qualifier that would be 346 * smaller than all other possible KeyValues that have the same row,family,qualifier. 347 * Used for seeking. 348 * @param row - row key (arbitrary byte array) 349 * @param family - family name 350 * @param qualifier - column qualifier 351 * @return First possible key on passed <code>row</code>, and column. 352 */ 353 public static KeyValue createFirstOnRow(final byte [] row, final byte [] family, 354 final byte [] qualifier) { 355 return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum); 356 } 357 358 /** 359 * @param row - row key (arbitrary byte array) 360 * @param f - family name 361 * @param q - column qualifier 362 * @param ts - timestamp 363 * @return First possible key on passed <code>row</code>, column and timestamp 364 */ 365 public static KeyValue createFirstOnRow(final byte [] row, final byte [] f, 366 final byte [] q, final long ts) { 367 return new KeyValue(row, f, q, ts, Type.Maximum); 368 } 369 370 /** 371 * Create a KeyValue for the specified row, family and qualifier that would be 372 * smaller than all other possible KeyValues that have the same row, 373 * family, qualifier. 374 * Used for seeking. 375 * @param row row key 376 * @param roffset row offset 377 * @param rlength row length 378 * @param family family name 379 * @param foffset family offset 380 * @param flength family length 381 * @param qualifier column qualifier 382 * @param qoffset qualifier offset 383 * @param qlength qualifier length 384 * @return First possible key on passed Row, Family, Qualifier. 385 */ 386 public static KeyValue createFirstOnRow(final byte [] row, 387 final int roffset, final int rlength, final byte [] family, 388 final int foffset, final int flength, final byte [] qualifier, 389 final int qoffset, final int qlength) { 390 return new KeyValue(row, roffset, rlength, family, 391 foffset, flength, qualifier, qoffset, qlength, 392 HConstants.LATEST_TIMESTAMP, Type.Maximum, null, 0, 0); 393 } 394 395 /** 396 * Create a KeyValue for the specified row, family and qualifier that would be 397 * smaller than all other possible KeyValues that have the same row, 398 * family, qualifier. 399 * Used for seeking. 400 * 401 * @param buffer the buffer to use for the new <code>KeyValue</code> object 402 * @param row the value key 403 * @param family family name 404 * @param qualifier column qualifier 405 * 406 * @return First possible key on passed Row, Family, Qualifier. 407 * 408 * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger 409 * than the provided buffer or than <code>Integer.MAX_VALUE</code> 410 */ 411 public static KeyValue createFirstOnRow(byte [] buffer, final byte [] row, 412 final byte [] family, final byte [] qualifier) 413 throws IllegalArgumentException { 414 return createFirstOnRow(buffer, 0, row, 0, row.length, 415 family, 0, family.length, 416 qualifier, 0, qualifier.length); 417 } 418 419 /** 420 * Create a KeyValue for the specified row, family and qualifier that would be 421 * smaller than all other possible KeyValues that have the same row, 422 * family, qualifier. 423 * Used for seeking. 424 * 425 * @param buffer the buffer to use for the new <code>KeyValue</code> object 426 * @param boffset buffer offset 427 * @param row the value key 428 * @param roffset row offset 429 * @param rlength row length 430 * @param family family name 431 * @param foffset family offset 432 * @param flength family length 433 * @param qualifier column qualifier 434 * @param qoffset qualifier offset 435 * @param qlength qualifier length 436 * 437 * @return First possible key on passed Row, Family, Qualifier. 438 * 439 * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger 440 * than the provided buffer or than <code>Integer.MAX_VALUE</code> 441 */ 442 public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row, 443 final int roffset, final int rlength, final byte[] family, final int foffset, 444 final int flength, final byte[] qualifier, final int qoffset, final int qlength) 445 throws IllegalArgumentException { 446 447 long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0); 448 449 if (lLength > Integer.MAX_VALUE) { 450 throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE); 451 } 452 int iLength = (int) lLength; 453 if (buffer.length - boffset < iLength) { 454 throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < " 455 + iLength); 456 } 457 458 int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset, 459 flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum, 460 null, 0, 0, null); 461 return new KeyValue(buffer, boffset, len); 462 } 463 464 /*************** misc **********************************/ 465 /** 466 * @param cell 467 * @return <code>cell</code> if it is an object of class {@link KeyValue} else we will return a 468 * new {@link KeyValue} instance made from <code>cell</code> Note: Even if the cell is an 469 * object of any of the subclass of {@link KeyValue}, we will create a new 470 * {@link KeyValue} object wrapping same buffer. This API is used only with MR based tools 471 * which expect the type to be exactly KeyValue. That is the reason for doing this way. 472 * @deprecated without any replacement. 473 */ 474 @Deprecated 475 public static KeyValue ensureKeyValue(final Cell cell) { 476 if (cell == null) return null; 477 if (cell instanceof KeyValue) { 478 if (cell.getClass().getName().equals(KeyValue.class.getName())) { 479 return (KeyValue) cell; 480 } 481 // Cell is an Object of any of the sub classes of KeyValue. Make a new KeyValue wrapping the 482 // same byte[] 483 KeyValue kv = (KeyValue) cell; 484 KeyValue newKv = new KeyValue(kv.bytes, kv.offset, kv.length); 485 newKv.setSequenceId(kv.getSequenceId()); 486 return newKv; 487 } 488 return copyToNewKeyValue(cell); 489 } 490 491 @Deprecated 492 public static List<KeyValue> ensureKeyValues(List<Cell> cells) { 493 List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() { 494 @Override 495 public KeyValue apply(Cell arg0) { 496 return KeyValueUtil.ensureKeyValue(arg0); 497 } 498 }); 499 return new ArrayList<>(lazyList); 500 } 501 /** 502 * Write out a KeyValue in the manner in which we used to when KeyValue was a 503 * Writable. 504 * 505 * @param kv 506 * @param out 507 * @return Length written on stream 508 * @throws IOException 509 * @see #create(DataInput) for the inverse function 510 */ 511 public static long write(final KeyValue kv, final DataOutput out) throws IOException { 512 // This is how the old Writables write used to serialize KVs. Need to figure 513 // way to make it 514 // work for all implementations. 515 int length = kv.getLength(); 516 out.writeInt(length); 517 out.write(kv.getBuffer(), kv.getOffset(), length); 518 return (long) length + Bytes.SIZEOF_INT; 519 } 520 521 static String bytesToHex(byte[] buf, int offset, int length) { 522 return ", KeyValueBytesHex=" + Bytes.toStringBinary(buf, offset, length) + ", offset=" + offset 523 + ", length=" + length; 524 } 525 526 static void checkKeyValueBytes(byte[] buf, int offset, int length, boolean withTags) { 527 int pos = offset, endOffset = offset + length; 528 // check the key 529 if (pos + Bytes.SIZEOF_INT > endOffset) { 530 throw new IllegalArgumentException( 531 "Overflow when reading key length at position=" + pos + bytesToHex(buf, offset, length)); 532 } 533 int keyLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT); 534 pos += Bytes.SIZEOF_INT; 535 if (keyLen <= 0 || pos + keyLen > endOffset) { 536 throw new IllegalArgumentException( 537 "Invalid key length in KeyValue. keyLength=" + keyLen + bytesToHex(buf, offset, length)); 538 } 539 // check the value 540 if (pos + Bytes.SIZEOF_INT > endOffset) { 541 throw new IllegalArgumentException("Overflow when reading value length at position=" + pos 542 + bytesToHex(buf, offset, length)); 543 } 544 int valLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT); 545 pos += Bytes.SIZEOF_INT; 546 if (valLen < 0 || pos + valLen > endOffset) { 547 throw new IllegalArgumentException("Invalid value length in KeyValue, valueLength=" + valLen 548 + bytesToHex(buf, offset, length)); 549 } 550 // check the row 551 if (pos + Bytes.SIZEOF_SHORT > endOffset) { 552 throw new IllegalArgumentException( 553 "Overflow when reading row length at position=" + pos + bytesToHex(buf, offset, length)); 554 } 555 short rowLen = Bytes.toShort(buf, pos, Bytes.SIZEOF_SHORT); 556 pos += Bytes.SIZEOF_SHORT; 557 if (rowLen < 0 || pos + rowLen > endOffset) { 558 throw new IllegalArgumentException( 559 "Invalid row length in KeyValue, rowLength=" + rowLen + bytesToHex(buf, offset, length)); 560 } 561 pos += rowLen; 562 // check the family 563 if (pos + Bytes.SIZEOF_BYTE > endOffset) { 564 throw new IllegalArgumentException("Overflow when reading family length at position=" + pos 565 + bytesToHex(buf, offset, length)); 566 } 567 int familyLen = buf[pos]; 568 pos += Bytes.SIZEOF_BYTE; 569 if (familyLen < 0 || pos + familyLen > endOffset) { 570 throw new IllegalArgumentException("Invalid family length in KeyValue, familyLength=" 571 + familyLen + bytesToHex(buf, offset, length)); 572 } 573 pos += familyLen; 574 // check the qualifier 575 int qualifierLen = keyLen - Bytes.SIZEOF_SHORT - rowLen - Bytes.SIZEOF_BYTE - familyLen 576 - Bytes.SIZEOF_LONG - Bytes.SIZEOF_BYTE; 577 if (qualifierLen < 0 || pos + qualifierLen > endOffset) { 578 throw new IllegalArgumentException("Invalid qualifier length in KeyValue, qualifierLen=" 579 + qualifierLen + bytesToHex(buf, offset, length)); 580 } 581 pos += qualifierLen; 582 // check the timestamp 583 if (pos + Bytes.SIZEOF_LONG > endOffset) { 584 throw new IllegalArgumentException( 585 "Overflow when reading timestamp at position=" + pos + bytesToHex(buf, offset, length)); 586 } 587 long timestamp = Bytes.toLong(buf, pos, Bytes.SIZEOF_LONG); 588 if (timestamp < 0) { 589 throw new IllegalArgumentException( 590 "Timestamp cannot be negative, ts=" + timestamp + bytesToHex(buf, offset, length)); 591 } 592 pos += Bytes.SIZEOF_LONG; 593 // check the type 594 if (pos + Bytes.SIZEOF_BYTE > endOffset) { 595 throw new IllegalArgumentException( 596 "Overflow when reading type at position=" + pos + bytesToHex(buf, offset, length)); 597 } 598 byte type = buf[pos]; 599 if (!Type.isValidType(type)) { 600 throw new IllegalArgumentException( 601 "Invalid type in KeyValue, type=" + type + bytesToHex(buf, offset, length)); 602 } 603 pos += Bytes.SIZEOF_BYTE; 604 // check the value 605 if (pos + valLen > endOffset) { 606 throw new IllegalArgumentException( 607 "Overflow when reading value part at position=" + pos + bytesToHex(buf, offset, length)); 608 } 609 pos += valLen; 610 // check the tags 611 if (withTags) { 612 if (pos == endOffset) { 613 // withTags is true but no tag in the cell. 614 return; 615 } 616 if (pos + Bytes.SIZEOF_SHORT > endOffset) { 617 throw new IllegalArgumentException("Overflow when reading tags length at position=" + pos 618 + bytesToHex(buf, offset, length)); 619 } 620 short tagsLen = Bytes.toShort(buf, pos); 621 pos += Bytes.SIZEOF_SHORT; 622 if (tagsLen < 0 || pos + tagsLen > endOffset) { 623 throw new IllegalArgumentException("Invalid tags length in KeyValue at position=" 624 + (pos - Bytes.SIZEOF_SHORT) + bytesToHex(buf, offset, length)); 625 } 626 int tagsEndOffset = pos + tagsLen; 627 for (; pos < tagsEndOffset;) { 628 if (pos + Tag.TAG_LENGTH_SIZE > endOffset) { 629 throw new IllegalArgumentException("Overflow when reading tag length at position=" + pos 630 + bytesToHex(buf, offset, length)); 631 } 632 short tagLen = Bytes.toShort(buf, pos); 633 pos += Tag.TAG_LENGTH_SIZE; 634 // tagLen contains one byte tag type, so must be not less than 1. 635 if (tagLen < 1 || pos + tagLen > endOffset) { 636 throw new IllegalArgumentException( 637 "Invalid tag length at position=" + (pos - Tag.TAG_LENGTH_SIZE) + ", tagLength=" 638 + tagLen + bytesToHex(buf, offset, length)); 639 } 640 pos += tagLen; 641 } 642 } 643 if (pos != endOffset) { 644 throw new IllegalArgumentException("Some redundant bytes in KeyValue's buffer, startOffset=" 645 + pos + ", endOffset=" + endOffset + bytesToHex(buf, offset, length)); 646 } 647 } 648 649 /** 650 * Create a KeyValue reading from the raw InputStream. Named 651 * <code>createKeyValueFromInputStream</code> so doesn't clash with {@link #create(DataInput)} 652 * @param in inputStream to read. 653 * @param withTags whether the keyvalue should include tags are not 654 * @return Created KeyValue OR if we find a length of zero, we will return null which can be 655 * useful marking a stream as done. 656 * @throws IOException 657 */ 658 public static KeyValue createKeyValueFromInputStream(InputStream in, boolean withTags) 659 throws IOException { 660 byte[] intBytes = new byte[Bytes.SIZEOF_INT]; 661 int bytesRead = 0; 662 while (bytesRead < intBytes.length) { 663 int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead); 664 if (n < 0) { 665 if (bytesRead == 0) { 666 throw new EOFException(); 667 } 668 throw new IOException("Failed read of int, read " + bytesRead + " bytes"); 669 } 670 bytesRead += n; 671 } 672 byte[] bytes = new byte[Bytes.toInt(intBytes)]; 673 IOUtils.readFully(in, bytes, 0, bytes.length); 674 return withTags ? new KeyValue(bytes, 0, bytes.length) 675 : new NoTagsKeyValue(bytes, 0, bytes.length); 676 } 677 678 /** 679 * @param b 680 * @return A KeyValue made of a byte array that holds the key-only part. 681 * Needed to convert hfile index members to KeyValues. 682 */ 683 public static KeyValue createKeyValueFromKey(final byte[] b) { 684 return createKeyValueFromKey(b, 0, b.length); 685 } 686 687 /** 688 * @param bb 689 * @return A KeyValue made of a byte buffer that holds the key-only part. 690 * Needed to convert hfile index members to KeyValues. 691 */ 692 public static KeyValue createKeyValueFromKey(final ByteBuffer bb) { 693 return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit()); 694 } 695 696 /** 697 * @param b 698 * @param o 699 * @param l 700 * @return A KeyValue made of a byte array that holds the key-only part. 701 * Needed to convert hfile index members to KeyValues. 702 */ 703 public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) { 704 byte[] newb = new byte[l + KeyValue.ROW_OFFSET]; 705 System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l); 706 Bytes.putInt(newb, 0, l); 707 Bytes.putInt(newb, Bytes.SIZEOF_INT, 0); 708 return new KeyValue(newb); 709 } 710 711 /** 712 * @param in 713 * Where to read bytes from. Creates a byte array to hold the 714 * KeyValue backing bytes copied from the steam. 715 * @return KeyValue created by deserializing from <code>in</code> OR if we 716 * find a length of zero, we will return null which can be useful 717 * marking a stream as done. 718 * @throws IOException 719 */ 720 public static KeyValue create(final DataInput in) throws IOException { 721 return create(in.readInt(), in); 722 } 723 724 /** 725 * Create a KeyValue reading <code>length</code> from <code>in</code> 726 * 727 * @param length 728 * @param in 729 * @return Created KeyValue OR if we find a length of zero, we will return 730 * null which can be useful marking a stream as done. 731 * @throws IOException 732 */ 733 public static KeyValue create(int length, final DataInput in) throws IOException { 734 735 if (length <= 0) { 736 if (length == 0) 737 return null; 738 throw new IOException("Failed read " + length + " bytes, stream corrupt?"); 739 } 740 741 // This is how the old Writables.readFrom used to deserialize. Didn't even 742 // vint. 743 byte[] bytes = new byte[length]; 744 in.readFully(bytes); 745 return new KeyValue(bytes, 0, length); 746 } 747 748 public static int getSerializedSize(Cell cell, boolean withTags) { 749 if (cell instanceof ExtendedCell) { 750 return ((ExtendedCell) cell).getSerializedSize(withTags); 751 } 752 return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(), 753 cell.getValueLength(), cell.getTagsLength(), withTags); 754 } 755 756 public static int oswrite(final Cell cell, final OutputStream out, final boolean withTags) 757 throws IOException { 758 if (cell instanceof ExtendedCell) { 759 return ((ExtendedCell)cell).write(out, withTags); 760 } else { 761 short rlen = cell.getRowLength(); 762 byte flen = cell.getFamilyLength(); 763 int qlen = cell.getQualifierLength(); 764 int vlen = cell.getValueLength(); 765 int tlen = cell.getTagsLength(); 766 int size = 0; 767 // write key length 768 int klen = keyLength(rlen, flen, qlen); 769 ByteBufferUtils.putInt(out, klen); 770 // write value length 771 ByteBufferUtils.putInt(out, vlen); 772 // Write rowkey - 2 bytes rk length followed by rowkey bytes 773 StreamUtils.writeShort(out, rlen); 774 out.write(cell.getRowArray(), cell.getRowOffset(), rlen); 775 // Write cf - 1 byte of cf length followed by the family bytes 776 out.write(flen); 777 out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen); 778 // write qualifier 779 out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen); 780 // write timestamp 781 StreamUtils.writeLong(out, cell.getTimestamp()); 782 // write the type 783 out.write(cell.getTypeByte()); 784 // write value 785 out.write(cell.getValueArray(), cell.getValueOffset(), vlen); 786 size = klen + vlen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE; 787 // write tags if we have to 788 if (withTags && tlen > 0) { 789 // 2 bytes tags length followed by tags bytes 790 // tags length is serialized with 2 bytes only(short way) even if the 791 // type is int. As this 792 // is non -ve numbers, we save the sign bit. See HBASE-11437 793 out.write((byte) (0xff & (tlen >> 8))); 794 out.write((byte) (0xff & tlen)); 795 out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen); 796 size += tlen + KeyValue.TAGS_LENGTH_SIZE; 797 } 798 return size; 799 } 800 } 801}