001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.io.hfile; 020 021import java.io.DataOutput; 022import java.io.DataOutputStream; 023import java.io.IOException; 024import java.net.InetSocketAddress; 025import java.nio.ByteBuffer; 026import java.util.ArrayList; 027import java.util.List; 028 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataOutputStream; 031import org.apache.hadoop.fs.FileSystem; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.fs.permission.FsPermission; 034import org.apache.hadoop.hbase.ByteBufferExtendedCell; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.CellComparator; 037import org.apache.hadoop.hbase.CellUtil; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.PrivateCellUtil; 040import org.apache.hadoop.hbase.KeyValueUtil; 041import org.apache.hadoop.hbase.CellComparatorImpl.MetaCellComparator; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045import org.apache.hadoop.hbase.io.compress.Compression; 046import org.apache.hadoop.hbase.io.crypto.Encryption; 047import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 048import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; 049import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; 050import org.apache.hadoop.hbase.security.EncryptionUtil; 051import org.apache.hadoop.hbase.security.User; 052import org.apache.hadoop.hbase.util.BloomFilterWriter; 053import org.apache.hadoop.hbase.util.ByteBufferUtils; 054import org.apache.hadoop.hbase.util.Bytes; 055import org.apache.hadoop.hbase.util.FSUtils; 056import org.apache.hadoop.io.Writable; 057 058/** 059 * Common functionality needed by all versions of {@link HFile} writers. 060 */ 061@InterfaceAudience.Private 062public class HFileWriterImpl implements HFile.Writer { 063 private static final Logger LOG = LoggerFactory.getLogger(HFileWriterImpl.class); 064 065 private static final long UNSET = -1; 066 067 /** if this feature is enabled, preCalculate encoded data size before real encoding happens*/ 068 public static final String UNIFIED_ENCODED_BLOCKSIZE_RATIO = "hbase.writer.unified.encoded.blocksize.ratio"; 069 070 /** Block size limit after encoding, used to unify encoded block Cache entry size*/ 071 private final int encodedBlockSizeLimit; 072 073 /** The Cell previously appended. Becomes the last cell in the file.*/ 074 protected Cell lastCell = null; 075 076 /** FileSystem stream to write into. */ 077 protected FSDataOutputStream outputStream; 078 079 /** True if we opened the <code>outputStream</code> (and so will close it). */ 080 protected final boolean closeOutputStream; 081 082 /** A "file info" block: a key-value map of file-wide metadata. */ 083 protected FileInfo fileInfo = new HFile.FileInfo(); 084 085 /** Total # of key/value entries, i.e. how many times add() was called. */ 086 protected long entryCount = 0; 087 088 /** Used for calculating the average key length. */ 089 protected long totalKeyLength = 0; 090 091 /** Used for calculating the average value length. */ 092 protected long totalValueLength = 0; 093 094 /** Total uncompressed bytes, maybe calculate a compression ratio later. */ 095 protected long totalUncompressedBytes = 0; 096 097 /** Key comparator. Used to ensure we write in order. */ 098 protected final CellComparator comparator; 099 100 /** Meta block names. */ 101 protected List<byte[]> metaNames = new ArrayList<>(); 102 103 /** {@link Writable}s representing meta block data. */ 104 protected List<Writable> metaData = new ArrayList<>(); 105 106 /** 107 * First cell in a block. 108 * This reference should be short-lived since we write hfiles in a burst. 109 */ 110 protected Cell firstCellInBlock = null; 111 112 113 /** May be null if we were passed a stream. */ 114 protected final Path path; 115 116 /** Cache configuration for caching data on write. */ 117 protected final CacheConfig cacheConf; 118 119 /** 120 * Name for this object used when logging or in toString. Is either 121 * the result of a toString on stream or else name of passed file Path. 122 */ 123 protected final String name; 124 125 /** 126 * The data block encoding which will be used. 127 * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding. 128 */ 129 protected final HFileDataBlockEncoder blockEncoder; 130 131 protected final HFileContext hFileContext; 132 133 private int maxTagsLength = 0; 134 135 /** KeyValue version in FileInfo */ 136 public static final byte [] KEY_VALUE_VERSION = Bytes.toBytes("KEY_VALUE_VERSION"); 137 138 /** Version for KeyValue which includes memstore timestamp */ 139 public static final int KEY_VALUE_VER_WITH_MEMSTORE = 1; 140 141 /** Inline block writers for multi-level block index and compound Blooms. */ 142 private List<InlineBlockWriter> inlineBlockWriters = new ArrayList<>(); 143 144 /** block writer */ 145 protected HFileBlock.Writer blockWriter; 146 147 private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter; 148 private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter; 149 150 /** The offset of the first data block or -1 if the file is empty. */ 151 private long firstDataBlockOffset = UNSET; 152 153 /** The offset of the last data block or 0 if the file is empty. */ 154 protected long lastDataBlockOffset = UNSET; 155 156 /** 157 * The last(stop) Cell of the previous data block. 158 * This reference should be short-lived since we write hfiles in a burst. 159 */ 160 private Cell lastCellOfPreviousBlock = null; 161 162 /** Additional data items to be written to the "load-on-open" section. */ 163 private List<BlockWritable> additionalLoadOnOpenData = new ArrayList<>(); 164 165 protected long maxMemstoreTS = 0; 166 167 public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path, 168 FSDataOutputStream outputStream, 169 CellComparator comparator, HFileContext fileContext) { 170 this.outputStream = outputStream; 171 this.path = path; 172 this.name = path != null ? path.getName() : outputStream.toString(); 173 this.hFileContext = fileContext; 174 DataBlockEncoding encoding = hFileContext.getDataBlockEncoding(); 175 if (encoding != DataBlockEncoding.NONE) { 176 this.blockEncoder = new HFileDataBlockEncoderImpl(encoding); 177 } else { 178 this.blockEncoder = NoOpDataBlockEncoder.INSTANCE; 179 } 180 this.comparator = comparator != null ? comparator : CellComparator.getInstance(); 181 182 closeOutputStream = path != null; 183 this.cacheConf = cacheConf; 184 float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f); 185 this.encodedBlockSizeLimit = (int)(hFileContext.getBlocksize() * encodeBlockSizeRatio); 186 finishInit(conf); 187 if (LOG.isTraceEnabled()) { 188 LOG.trace("Writer" + (path != null ? " for " + path : "") + 189 " initialized with cacheConf: " + cacheConf + 190 " comparator: " + comparator.getClass().getSimpleName() + 191 " fileContext: " + fileContext); 192 } 193 } 194 195 /** 196 * Add to the file info. All added key/value pairs can be obtained using 197 * {@link HFile.Reader#loadFileInfo()}. 198 * 199 * @param k Key 200 * @param v Value 201 * @throws IOException in case the key or the value are invalid 202 */ 203 @Override 204 public void appendFileInfo(final byte[] k, final byte[] v) 205 throws IOException { 206 fileInfo.append(k, v, true); 207 } 208 209 /** 210 * Sets the file info offset in the trailer, finishes up populating fields in 211 * the file info, and writes the file info into the given data output. The 212 * reason the data output is not always {@link #outputStream} is that we store 213 * file info as a block in version 2. 214 * 215 * @param trailer fixed file trailer 216 * @param out the data output to write the file info to 217 * @throws IOException 218 */ 219 protected final void writeFileInfo(FixedFileTrailer trailer, DataOutputStream out) 220 throws IOException { 221 trailer.setFileInfoOffset(outputStream.getPos()); 222 finishFileInfo(); 223 long startTime = System.currentTimeMillis(); 224 fileInfo.write(out); 225 HFile.updateWriteLatency(System.currentTimeMillis() - startTime); 226 } 227 228 /** 229 * Checks that the given Cell's key does not violate the key order. 230 * 231 * @param cell Cell whose key to check. 232 * @return true if the key is duplicate 233 * @throws IOException if the key or the key order is wrong 234 */ 235 protected boolean checkKey(final Cell cell) throws IOException { 236 boolean isDuplicateKey = false; 237 238 if (cell == null) { 239 throw new IOException("Key cannot be null or empty"); 240 } 241 if (lastCell != null) { 242 int keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(comparator, lastCell, cell); 243 244 if (keyComp > 0) { 245 throw new IOException("Added a key not lexically larger than" 246 + " previous. Current cell = " + cell + ", lastCell = " + lastCell); 247 } else if (keyComp == 0) { 248 isDuplicateKey = true; 249 } 250 } 251 return isDuplicateKey; 252 } 253 254 /** Checks the given value for validity. */ 255 protected void checkValue(final byte[] value, final int offset, 256 final int length) throws IOException { 257 if (value == null) { 258 throw new IOException("Value cannot be null"); 259 } 260 } 261 262 /** 263 * @return Path or null if we were passed a stream rather than a Path. 264 */ 265 @Override 266 public Path getPath() { 267 return path; 268 } 269 270 @Override 271 public String toString() { 272 return "writer=" + (path != null ? path.toString() : null) + ", name=" 273 + name + ", compression=" + hFileContext.getCompression().getName(); 274 } 275 276 public static Compression.Algorithm compressionByName(String algoName) { 277 if (algoName == null) 278 return HFile.DEFAULT_COMPRESSION_ALGORITHM; 279 return Compression.getCompressionAlgorithmByName(algoName); 280 } 281 282 /** A helper method to create HFile output streams in constructors */ 283 protected static FSDataOutputStream createOutputStream(Configuration conf, 284 FileSystem fs, Path path, InetSocketAddress[] favoredNodes) throws IOException { 285 FsPermission perms = FSUtils.getFilePermissions(fs, conf, 286 HConstants.DATA_FILE_UMASK_KEY); 287 return FSUtils.create(conf, fs, path, perms, favoredNodes); 288 } 289 290 /** Additional initialization steps */ 291 protected void finishInit(final Configuration conf) { 292 if (blockWriter != null) { 293 throw new IllegalStateException("finishInit called twice"); 294 } 295 296 blockWriter = new HFileBlock.Writer(blockEncoder, hFileContext); 297 298 // Data block index writer 299 boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite(); 300 dataBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(blockWriter, 301 cacheIndexesOnWrite ? cacheConf : null, 302 cacheIndexesOnWrite ? name : null); 303 dataBlockIndexWriter.setMaxChunkSize( 304 HFileBlockIndex.getMaxChunkSize(conf)); 305 dataBlockIndexWriter.setMinIndexNumEntries( 306 HFileBlockIndex.getMinIndexNumEntries(conf)); 307 inlineBlockWriters.add(dataBlockIndexWriter); 308 309 // Meta data block index writer 310 metaBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(); 311 if (LOG.isTraceEnabled()) LOG.trace("Initialized with " + cacheConf); 312 } 313 314 /** 315 * At a block boundary, write all the inline blocks and opens new block. 316 * 317 * @throws IOException 318 */ 319 protected void checkBlockBoundary() throws IOException { 320 //for encoder like prefixTree, encoded size is not available, so we have to compare both encoded size 321 //and unencoded size to blocksize limit. 322 if (blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit 323 || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize()) { 324 finishBlock(); 325 writeInlineBlocks(false); 326 newBlock(); 327 } 328 } 329 330 /** Clean up the data block that is currently being written.*/ 331 private void finishBlock() throws IOException { 332 if (!blockWriter.isWriting() || blockWriter.blockSizeWritten() == 0) return; 333 334 // Update the first data block offset if UNSET; used scanning. 335 if (firstDataBlockOffset == UNSET) { 336 firstDataBlockOffset = outputStream.getPos(); 337 } 338 // Update the last data block offset each time through here. 339 lastDataBlockOffset = outputStream.getPos(); 340 blockWriter.writeHeaderAndData(outputStream); 341 int onDiskSize = blockWriter.getOnDiskSizeWithHeader(); 342 Cell indexEntry = 343 getMidpoint(this.comparator, lastCellOfPreviousBlock, firstCellInBlock); 344 dataBlockIndexWriter.addEntry(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(indexEntry), 345 lastDataBlockOffset, onDiskSize); 346 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 347 if (cacheConf.shouldCacheDataOnWrite()) { 348 doCacheOnWrite(lastDataBlockOffset); 349 } 350 } 351 352 /** 353 * Try to return a Cell that falls between <code>left</code> and 354 * <code>right</code> but that is shorter; i.e. takes up less space. This 355 * trick is used building HFile block index. Its an optimization. It does not 356 * always work. In this case we'll just return the <code>right</code> cell. 357 * 358 * @param comparator 359 * Comparator to use. 360 * @param left 361 * @param right 362 * @return A cell that sorts between <code>left</code> and <code>right</code>. 363 */ 364 public static Cell getMidpoint(final CellComparator comparator, final Cell left, 365 final Cell right) { 366 // TODO: Redo so only a single pass over the arrays rather than one to 367 // compare and then a second composing midpoint. 368 if (right == null) { 369 throw new IllegalArgumentException("right cell can not be null"); 370 } 371 if (left == null) { 372 return right; 373 } 374 // If Cells from meta table, don't mess around. meta table Cells have schema 375 // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip 376 // out without trying to do this optimization. 377 if (comparator instanceof MetaCellComparator) { 378 return right; 379 } 380 int diff = comparator.compareRows(left, right); 381 if (diff > 0) { 382 throw new IllegalArgumentException("Left row sorts after right row; left=" 383 + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); 384 } 385 byte[] midRow; 386 boolean bufferBacked = left instanceof ByteBufferExtendedCell 387 && right instanceof ByteBufferExtendedCell; 388 if (diff < 0) { 389 // Left row is < right row. 390 if (bufferBacked) { 391 midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getRowByteBuffer(), 392 ((ByteBufferExtendedCell) left).getRowPosition(), left.getRowLength(), 393 ((ByteBufferExtendedCell) right).getRowByteBuffer(), 394 ((ByteBufferExtendedCell) right).getRowPosition(), right.getRowLength()); 395 } else { 396 midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(), 397 left.getRowLength(), right.getRowArray(), right.getRowOffset(), right.getRowLength()); 398 } 399 // If midRow is null, just return 'right'. Can't do optimization. 400 if (midRow == null) return right; 401 return PrivateCellUtil.createFirstOnRow(midRow); 402 } 403 // Rows are same. Compare on families. 404 diff = comparator.compareFamilies(left, right); 405 if (diff > 0) { 406 throw new IllegalArgumentException("Left family sorts after right family; left=" 407 + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); 408 } 409 if (diff < 0) { 410 if (bufferBacked) { 411 midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getFamilyByteBuffer(), 412 ((ByteBufferExtendedCell) left).getFamilyPosition(), left.getFamilyLength(), 413 ((ByteBufferExtendedCell) right).getFamilyByteBuffer(), 414 ((ByteBufferExtendedCell) right).getFamilyPosition(), right.getFamilyLength()); 415 } else { 416 midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(), 417 left.getFamilyLength(), right.getFamilyArray(), right.getFamilyOffset(), 418 right.getFamilyLength()); 419 } 420 // If midRow is null, just return 'right'. Can't do optimization. 421 if (midRow == null) return right; 422 // Return new Cell where we use right row and then a mid sort family. 423 return PrivateCellUtil.createFirstOnRowFamily(right, midRow, 0, midRow.length); 424 } 425 // Families are same. Compare on qualifiers. 426 diff = comparator.compareQualifiers(left, right); 427 if (diff > 0) { 428 throw new IllegalArgumentException("Left qualifier sorts after right qualifier; left=" 429 + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); 430 } 431 if (diff < 0) { 432 if (bufferBacked) { 433 midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getQualifierByteBuffer(), 434 ((ByteBufferExtendedCell) left).getQualifierPosition(), left.getQualifierLength(), 435 ((ByteBufferExtendedCell) right).getQualifierByteBuffer(), 436 ((ByteBufferExtendedCell) right).getQualifierPosition(), right.getQualifierLength()); 437 } else { 438 midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(), 439 left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(), 440 right.getQualifierLength()); 441 } 442 // If midRow is null, just return 'right'. Can't do optimization. 443 if (midRow == null) return right; 444 // Return new Cell where we use right row and family and then a mid sort qualifier. 445 return PrivateCellUtil.createFirstOnRowCol(right, midRow, 0, midRow.length); 446 } 447 // No opportunity for optimization. Just return right key. 448 return right; 449 } 450 451 /** 452 * @param leftArray 453 * @param leftOffset 454 * @param leftLength 455 * @param rightArray 456 * @param rightOffset 457 * @param rightLength 458 * @return Return a new array that is between left and right and minimally 459 * sized else just return null as indicator that we could not create a 460 * mid point. 461 */ 462 private static byte[] getMinimumMidpointArray(final byte[] leftArray, final int leftOffset, 463 final int leftLength, final byte[] rightArray, final int rightOffset, final int rightLength) { 464 // rows are different 465 int minLength = leftLength < rightLength ? leftLength : rightLength; 466 int diffIdx = 0; 467 while (diffIdx < minLength 468 && leftArray[leftOffset + diffIdx] == rightArray[rightOffset + diffIdx]) { 469 diffIdx++; 470 } 471 byte[] minimumMidpointArray = null; 472 if (diffIdx >= minLength) { 473 // leftKey's row is prefix of rightKey's. 474 minimumMidpointArray = new byte[diffIdx + 1]; 475 System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1); 476 } else { 477 int diffByte = leftArray[leftOffset + diffIdx]; 478 if ((0xff & diffByte) < 0xff && (diffByte + 1) < (rightArray[rightOffset + diffIdx] & 0xff)) { 479 minimumMidpointArray = new byte[diffIdx + 1]; 480 System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx); 481 minimumMidpointArray[diffIdx] = (byte) (diffByte + 1); 482 } else { 483 minimumMidpointArray = new byte[diffIdx + 1]; 484 System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1); 485 } 486 } 487 return minimumMidpointArray; 488 } 489 490 private static byte[] getMinimumMidpointArray(ByteBuffer left, int leftOffset, int leftLength, 491 ByteBuffer right, int rightOffset, int rightLength) { 492 // rows are different 493 int minLength = leftLength < rightLength ? leftLength : rightLength; 494 int diffIdx = 0; 495 while (diffIdx < minLength && ByteBufferUtils.toByte(left, 496 leftOffset + diffIdx) == ByteBufferUtils.toByte(right, rightOffset + diffIdx)) { 497 diffIdx++; 498 } 499 byte[] minMidpoint = null; 500 if (diffIdx >= minLength) { 501 // leftKey's row is prefix of rightKey's. 502 minMidpoint = new byte[diffIdx + 1]; 503 ByteBufferUtils.copyFromBufferToArray(minMidpoint, right, rightOffset, 0, diffIdx + 1); 504 } else { 505 int diffByte = ByteBufferUtils.toByte(left, leftOffset + diffIdx); 506 if ((0xff & diffByte) < 0xff 507 && (diffByte + 1) < (ByteBufferUtils.toByte(right, rightOffset + diffIdx) & 0xff)) { 508 minMidpoint = new byte[diffIdx + 1]; 509 ByteBufferUtils.copyFromBufferToArray(minMidpoint, left, leftOffset, 0, diffIdx); 510 minMidpoint[diffIdx] = (byte) (diffByte + 1); 511 } else { 512 minMidpoint = new byte[diffIdx + 1]; 513 ByteBufferUtils.copyFromBufferToArray(minMidpoint, right, rightOffset, 0, diffIdx + 1); 514 } 515 } 516 return minMidpoint; 517 } 518 519 /** Gives inline block writers an opportunity to contribute blocks. */ 520 private void writeInlineBlocks(boolean closing) throws IOException { 521 for (InlineBlockWriter ibw : inlineBlockWriters) { 522 while (ibw.shouldWriteBlock(closing)) { 523 long offset = outputStream.getPos(); 524 boolean cacheThisBlock = ibw.getCacheOnWrite(); 525 ibw.writeInlineBlock(blockWriter.startWriting( 526 ibw.getInlineBlockType())); 527 blockWriter.writeHeaderAndData(outputStream); 528 ibw.blockWritten(offset, blockWriter.getOnDiskSizeWithHeader(), 529 blockWriter.getUncompressedSizeWithoutHeader()); 530 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 531 532 if (cacheThisBlock) { 533 doCacheOnWrite(offset); 534 } 535 } 536 } 537 } 538 539 /** 540 * Caches the last written HFile block. 541 * @param offset the offset of the block we want to cache. Used to determine 542 * the cache key. 543 */ 544 private void doCacheOnWrite(long offset) { 545 HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf); 546 cacheConf.getBlockCache().cacheBlock( 547 new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()), 548 cacheFormatBlock); 549 } 550 551 /** 552 * Ready a new block for writing. 553 * 554 * @throws IOException 555 */ 556 protected void newBlock() throws IOException { 557 // This is where the next block begins. 558 blockWriter.startWriting(BlockType.DATA); 559 firstCellInBlock = null; 560 if (lastCell != null) { 561 lastCellOfPreviousBlock = lastCell; 562 } 563 } 564 565 /** 566 * Add a meta block to the end of the file. Call before close(). Metadata 567 * blocks are expensive. Fill one with a bunch of serialized data rather than 568 * do a metadata block per metadata instance. If metadata is small, consider 569 * adding to file info using {@link #appendFileInfo(byte[], byte[])} 570 * 571 * @param metaBlockName 572 * name of the block 573 * @param content 574 * will call readFields to get data later (DO NOT REUSE) 575 */ 576 @Override 577 public void appendMetaBlock(String metaBlockName, Writable content) { 578 byte[] key = Bytes.toBytes(metaBlockName); 579 int i; 580 for (i = 0; i < metaNames.size(); ++i) { 581 // stop when the current key is greater than our own 582 byte[] cur = metaNames.get(i); 583 if (Bytes.BYTES_RAWCOMPARATOR.compare(cur, 0, cur.length, key, 0, 584 key.length) > 0) { 585 break; 586 } 587 } 588 metaNames.add(i, key); 589 metaData.add(i, content); 590 } 591 592 @Override 593 public void close() throws IOException { 594 if (outputStream == null) { 595 return; 596 } 597 // Save data block encoder metadata in the file info. 598 blockEncoder.saveMetadata(this); 599 // Write out the end of the data blocks, then write meta data blocks. 600 // followed by fileinfo, data block index and meta block index. 601 602 finishBlock(); 603 writeInlineBlocks(true); 604 605 FixedFileTrailer trailer = new FixedFileTrailer(getMajorVersion(), getMinorVersion()); 606 607 // Write out the metadata blocks if any. 608 if (!metaNames.isEmpty()) { 609 for (int i = 0; i < metaNames.size(); ++i) { 610 // store the beginning offset 611 long offset = outputStream.getPos(); 612 // write the metadata content 613 DataOutputStream dos = blockWriter.startWriting(BlockType.META); 614 metaData.get(i).write(dos); 615 616 blockWriter.writeHeaderAndData(outputStream); 617 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 618 619 // Add the new meta block to the meta index. 620 metaBlockIndexWriter.addEntry(metaNames.get(i), offset, 621 blockWriter.getOnDiskSizeWithHeader()); 622 } 623 } 624 625 // Load-on-open section. 626 627 // Data block index. 628 // 629 // In version 2, this section of the file starts with the root level data 630 // block index. We call a function that writes intermediate-level blocks 631 // first, then root level, and returns the offset of the root level block 632 // index. 633 634 long rootIndexOffset = dataBlockIndexWriter.writeIndexBlocks(outputStream); 635 trailer.setLoadOnOpenOffset(rootIndexOffset); 636 637 // Meta block index. 638 metaBlockIndexWriter.writeSingleLevelIndex(blockWriter.startWriting( 639 BlockType.ROOT_INDEX), "meta"); 640 blockWriter.writeHeaderAndData(outputStream); 641 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 642 643 if (this.hFileContext.isIncludesMvcc()) { 644 appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS)); 645 appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE)); 646 } 647 648 // File info 649 writeFileInfo(trailer, blockWriter.startWriting(BlockType.FILE_INFO)); 650 blockWriter.writeHeaderAndData(outputStream); 651 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 652 653 // Load-on-open data supplied by higher levels, e.g. Bloom filters. 654 for (BlockWritable w : additionalLoadOnOpenData){ 655 blockWriter.writeBlock(w, outputStream); 656 totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader(); 657 } 658 659 // Now finish off the trailer. 660 trailer.setNumDataIndexLevels(dataBlockIndexWriter.getNumLevels()); 661 trailer.setUncompressedDataIndexSize( 662 dataBlockIndexWriter.getTotalUncompressedSize()); 663 trailer.setFirstDataBlockOffset(firstDataBlockOffset); 664 trailer.setLastDataBlockOffset(lastDataBlockOffset); 665 trailer.setComparatorClass(comparator.getClass()); 666 trailer.setDataIndexCount(dataBlockIndexWriter.getNumRootEntries()); 667 668 669 finishClose(trailer); 670 671 blockWriter.release(); 672 } 673 674 @Override 675 public void addInlineBlockWriter(InlineBlockWriter ibw) { 676 inlineBlockWriters.add(ibw); 677 } 678 679 @Override 680 public void addGeneralBloomFilter(final BloomFilterWriter bfw) { 681 this.addBloomFilter(bfw, BlockType.GENERAL_BLOOM_META); 682 } 683 684 @Override 685 public void addDeleteFamilyBloomFilter(final BloomFilterWriter bfw) { 686 this.addBloomFilter(bfw, BlockType.DELETE_FAMILY_BLOOM_META); 687 } 688 689 private void addBloomFilter(final BloomFilterWriter bfw, 690 final BlockType blockType) { 691 if (bfw.getKeyCount() <= 0) 692 return; 693 694 if (blockType != BlockType.GENERAL_BLOOM_META && 695 blockType != BlockType.DELETE_FAMILY_BLOOM_META) { 696 throw new RuntimeException("Block Type: " + blockType.toString() + 697 "is not supported"); 698 } 699 additionalLoadOnOpenData.add(new BlockWritable() { 700 @Override 701 public BlockType getBlockType() { 702 return blockType; 703 } 704 705 @Override 706 public void writeToBlock(DataOutput out) throws IOException { 707 bfw.getMetaWriter().write(out); 708 Writable dataWriter = bfw.getDataWriter(); 709 if (dataWriter != null) 710 dataWriter.write(out); 711 } 712 }); 713 } 714 715 @Override 716 public HFileContext getFileContext() { 717 return hFileContext; 718 } 719 720 /** 721 * Add key/value to file. Keys must be added in an order that agrees with the 722 * Comparator passed on construction. 723 * 724 * @param cell 725 * Cell to add. Cannot be empty nor null. 726 * @throws IOException 727 */ 728 @Override 729 public void append(final Cell cell) throws IOException { 730 // checkKey uses comparator to check we are writing in order. 731 boolean dupKey = checkKey(cell); 732 if (!dupKey) { 733 checkBlockBoundary(); 734 } 735 736 if (!blockWriter.isWriting()) { 737 newBlock(); 738 } 739 740 blockWriter.write(cell); 741 742 totalKeyLength += PrivateCellUtil.estimatedSerializedSizeOfKey(cell); 743 totalValueLength += cell.getValueLength(); 744 745 // Are we the first key in this block? 746 if (firstCellInBlock == null) { 747 // If cell is big, block will be closed and this firstCellInBlock reference will only last 748 // a short while. 749 firstCellInBlock = cell; 750 } 751 752 // TODO: What if cell is 10MB and we write infrequently? We hold on to cell here indefinitely? 753 lastCell = cell; 754 entryCount++; 755 this.maxMemstoreTS = Math.max(this.maxMemstoreTS, cell.getSequenceId()); 756 int tagsLength = cell.getTagsLength(); 757 if (tagsLength > this.maxTagsLength) { 758 this.maxTagsLength = tagsLength; 759 } 760 } 761 762 @Override 763 public void beforeShipped() throws IOException { 764 // Add clone methods for every cell 765 if (this.lastCell != null) { 766 this.lastCell = KeyValueUtil.toNewKeyCell(this.lastCell); 767 } 768 if (this.firstCellInBlock != null) { 769 this.firstCellInBlock = KeyValueUtil.toNewKeyCell(this.firstCellInBlock); 770 } 771 if (this.lastCellOfPreviousBlock != null) { 772 this.lastCellOfPreviousBlock = KeyValueUtil.toNewKeyCell(this.lastCellOfPreviousBlock); 773 } 774 } 775 776 protected void finishFileInfo() throws IOException { 777 if (lastCell != null) { 778 // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean 779 // byte buffer. Won't take a tuple. 780 byte [] lastKey = PrivateCellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell); 781 fileInfo.append(FileInfo.LASTKEY, lastKey, false); 782 } 783 784 // Average key length. 785 int avgKeyLen = 786 entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount); 787 fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false); 788 fileInfo.append(FileInfo.CREATE_TIME_TS, Bytes.toBytes(hFileContext.getFileCreateTime()), 789 false); 790 791 // Average value length. 792 int avgValueLen = 793 entryCount == 0 ? 0 : (int) (totalValueLength / entryCount); 794 fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false); 795 if (hFileContext.isIncludesTags()) { 796 // When tags are not being written in this file, MAX_TAGS_LEN is excluded 797 // from the FileInfo 798 fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false); 799 boolean tagsCompressed = (hFileContext.getDataBlockEncoding() != DataBlockEncoding.NONE) 800 && hFileContext.isCompressTags(); 801 fileInfo.append(FileInfo.TAGS_COMPRESSED, Bytes.toBytes(tagsCompressed), false); 802 } 803 } 804 805 protected int getMajorVersion() { 806 return 3; 807 } 808 809 protected int getMinorVersion() { 810 return HFileReaderImpl.MAX_MINOR_VERSION; 811 } 812 813 protected void finishClose(FixedFileTrailer trailer) throws IOException { 814 // Write out encryption metadata before finalizing if we have a valid crypto context 815 Encryption.Context cryptoContext = hFileContext.getEncryptionContext(); 816 if (cryptoContext != Encryption.Context.NONE) { 817 // Wrap the context's key and write it as the encryption metadata, the wrapper includes 818 // all information needed for decryption 819 trailer.setEncryptionKey(EncryptionUtil.wrapKey(cryptoContext.getConf(), 820 cryptoContext.getConf().get(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, 821 User.getCurrent().getShortName()), 822 cryptoContext.getKey())); 823 } 824 // Now we can finish the close 825 trailer.setMetaIndexCount(metaNames.size()); 826 trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize()); 827 trailer.setEntryCount(entryCount); 828 trailer.setCompressionCodec(hFileContext.getCompression()); 829 830 long startTime = System.currentTimeMillis(); 831 trailer.serialize(outputStream); 832 HFile.updateWriteLatency(System.currentTimeMillis() - startTime); 833 834 if (closeOutputStream) { 835 outputStream.close(); 836 outputStream = null; 837 } 838 } 839}