001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.io.hfile;
020
021import java.io.DataOutput;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.net.InetSocketAddress;
025import java.nio.ByteBuffer;
026import java.util.ArrayList;
027import java.util.List;
028
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataOutputStream;
031import org.apache.hadoop.fs.FileSystem;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.fs.permission.FsPermission;
034import org.apache.hadoop.hbase.ByteBufferExtendedCell;
035import org.apache.hadoop.hbase.Cell;
036import org.apache.hadoop.hbase.CellComparator;
037import org.apache.hadoop.hbase.CellUtil;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.PrivateCellUtil;
040import org.apache.hadoop.hbase.KeyValueUtil;
041import org.apache.hadoop.hbase.CellComparatorImpl.MetaCellComparator;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045import org.apache.hadoop.hbase.io.compress.Compression;
046import org.apache.hadoop.hbase.io.crypto.Encryption;
047import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
048import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
049import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
050import org.apache.hadoop.hbase.security.EncryptionUtil;
051import org.apache.hadoop.hbase.security.User;
052import org.apache.hadoop.hbase.util.BloomFilterWriter;
053import org.apache.hadoop.hbase.util.ByteBufferUtils;
054import org.apache.hadoop.hbase.util.Bytes;
055import org.apache.hadoop.hbase.util.FSUtils;
056import org.apache.hadoop.io.Writable;
057
058/**
059 * Common functionality needed by all versions of {@link HFile} writers.
060 */
061@InterfaceAudience.Private
062public class HFileWriterImpl implements HFile.Writer {
063  private static final Logger LOG = LoggerFactory.getLogger(HFileWriterImpl.class);
064
065  private static final long UNSET = -1;
066
067  /** if this feature is enabled, preCalculate encoded data size before real encoding happens*/
068  public static final String UNIFIED_ENCODED_BLOCKSIZE_RATIO = "hbase.writer.unified.encoded.blocksize.ratio";
069
070  /** Block size limit after encoding, used to unify encoded block Cache entry size*/
071  private final int encodedBlockSizeLimit;
072
073  /** The Cell previously appended. Becomes the last cell in the file.*/
074  protected Cell lastCell = null;
075
076  /** FileSystem stream to write into. */
077  protected FSDataOutputStream outputStream;
078
079  /** True if we opened the <code>outputStream</code> (and so will close it). */
080  protected final boolean closeOutputStream;
081
082  /** A "file info" block: a key-value map of file-wide metadata. */
083  protected FileInfo fileInfo = new HFile.FileInfo();
084
085  /** Total # of key/value entries, i.e. how many times add() was called. */
086  protected long entryCount = 0;
087
088  /** Used for calculating the average key length. */
089  protected long totalKeyLength = 0;
090
091  /** Used for calculating the average value length. */
092  protected long totalValueLength = 0;
093
094  /** Total uncompressed bytes, maybe calculate a compression ratio later. */
095  protected long totalUncompressedBytes = 0;
096
097  /** Key comparator. Used to ensure we write in order. */
098  protected final CellComparator comparator;
099
100  /** Meta block names. */
101  protected List<byte[]> metaNames = new ArrayList<>();
102
103  /** {@link Writable}s representing meta block data. */
104  protected List<Writable> metaData = new ArrayList<>();
105
106  /**
107   * First cell in a block.
108   * This reference should be short-lived since we write hfiles in a burst.
109   */
110  protected Cell firstCellInBlock = null;
111
112
113  /** May be null if we were passed a stream. */
114  protected final Path path;
115
116  /** Cache configuration for caching data on write. */
117  protected final CacheConfig cacheConf;
118
119  /**
120   * Name for this object used when logging or in toString. Is either
121   * the result of a toString on stream or else name of passed file Path.
122   */
123  protected final String name;
124
125  /**
126   * The data block encoding which will be used.
127   * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding.
128   */
129  protected final HFileDataBlockEncoder blockEncoder;
130
131  protected final HFileContext hFileContext;
132
133  private int maxTagsLength = 0;
134
135  /** KeyValue version in FileInfo */
136  public static final byte [] KEY_VALUE_VERSION = Bytes.toBytes("KEY_VALUE_VERSION");
137
138  /** Version for KeyValue which includes memstore timestamp */
139  public static final int KEY_VALUE_VER_WITH_MEMSTORE = 1;
140
141  /** Inline block writers for multi-level block index and compound Blooms. */
142  private List<InlineBlockWriter> inlineBlockWriters = new ArrayList<>();
143
144  /** block writer */
145  protected HFileBlock.Writer blockWriter;
146
147  private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter;
148  private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter;
149
150  /** The offset of the first data block or -1 if the file is empty. */
151  private long firstDataBlockOffset = UNSET;
152
153  /** The offset of the last data block or 0 if the file is empty. */
154  protected long lastDataBlockOffset = UNSET;
155
156  /**
157   * The last(stop) Cell of the previous data block.
158   * This reference should be short-lived since we write hfiles in a burst.
159   */
160  private Cell lastCellOfPreviousBlock = null;
161
162  /** Additional data items to be written to the "load-on-open" section. */
163  private List<BlockWritable> additionalLoadOnOpenData = new ArrayList<>();
164
165  protected long maxMemstoreTS = 0;
166
167  public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path,
168      FSDataOutputStream outputStream,
169      CellComparator comparator, HFileContext fileContext) {
170    this.outputStream = outputStream;
171    this.path = path;
172    this.name = path != null ? path.getName() : outputStream.toString();
173    this.hFileContext = fileContext;
174    DataBlockEncoding encoding = hFileContext.getDataBlockEncoding();
175    if (encoding != DataBlockEncoding.NONE) {
176      this.blockEncoder = new HFileDataBlockEncoderImpl(encoding);
177    } else {
178      this.blockEncoder = NoOpDataBlockEncoder.INSTANCE;
179    }
180    this.comparator = comparator != null ? comparator : CellComparator.getInstance();
181
182    closeOutputStream = path != null;
183    this.cacheConf = cacheConf;
184    float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f);
185    this.encodedBlockSizeLimit = (int)(hFileContext.getBlocksize() * encodeBlockSizeRatio);
186    finishInit(conf);
187    if (LOG.isTraceEnabled()) {
188      LOG.trace("Writer" + (path != null ? " for " + path : "") +
189        " initialized with cacheConf: " + cacheConf +
190        " comparator: " + comparator.getClass().getSimpleName() +
191        " fileContext: " + fileContext);
192    }
193  }
194
195  /**
196   * Add to the file info. All added key/value pairs can be obtained using
197   * {@link HFile.Reader#loadFileInfo()}.
198   *
199   * @param k Key
200   * @param v Value
201   * @throws IOException in case the key or the value are invalid
202   */
203  @Override
204  public void appendFileInfo(final byte[] k, final byte[] v)
205      throws IOException {
206    fileInfo.append(k, v, true);
207  }
208
209  /**
210   * Sets the file info offset in the trailer, finishes up populating fields in
211   * the file info, and writes the file info into the given data output. The
212   * reason the data output is not always {@link #outputStream} is that we store
213   * file info as a block in version 2.
214   *
215   * @param trailer fixed file trailer
216   * @param out the data output to write the file info to
217   * @throws IOException
218   */
219  protected final void writeFileInfo(FixedFileTrailer trailer, DataOutputStream out)
220  throws IOException {
221    trailer.setFileInfoOffset(outputStream.getPos());
222    finishFileInfo();
223    long startTime = System.currentTimeMillis();
224    fileInfo.write(out);
225    HFile.updateWriteLatency(System.currentTimeMillis() - startTime);
226  }
227
228  /**
229   * Checks that the given Cell's key does not violate the key order.
230   *
231   * @param cell Cell whose key to check.
232   * @return true if the key is duplicate
233   * @throws IOException if the key or the key order is wrong
234   */
235  protected boolean checkKey(final Cell cell) throws IOException {
236    boolean isDuplicateKey = false;
237
238    if (cell == null) {
239      throw new IOException("Key cannot be null or empty");
240    }
241    if (lastCell != null) {
242      int keyComp = PrivateCellUtil.compareKeyIgnoresMvcc(comparator, lastCell, cell);
243
244      if (keyComp > 0) {
245        throw new IOException("Added a key not lexically larger than"
246            + " previous. Current cell = " + cell + ", lastCell = " + lastCell);
247      } else if (keyComp == 0) {
248        isDuplicateKey = true;
249      }
250    }
251    return isDuplicateKey;
252  }
253
254  /** Checks the given value for validity. */
255  protected void checkValue(final byte[] value, final int offset,
256      final int length) throws IOException {
257    if (value == null) {
258      throw new IOException("Value cannot be null");
259    }
260  }
261
262  /**
263   * @return Path or null if we were passed a stream rather than a Path.
264   */
265  @Override
266  public Path getPath() {
267    return path;
268  }
269
270  @Override
271  public String toString() {
272    return "writer=" + (path != null ? path.toString() : null) + ", name="
273        + name + ", compression=" + hFileContext.getCompression().getName();
274  }
275
276  public static Compression.Algorithm compressionByName(String algoName) {
277    if (algoName == null)
278      return HFile.DEFAULT_COMPRESSION_ALGORITHM;
279    return Compression.getCompressionAlgorithmByName(algoName);
280  }
281
282  /** A helper method to create HFile output streams in constructors */
283  protected static FSDataOutputStream createOutputStream(Configuration conf,
284      FileSystem fs, Path path, InetSocketAddress[] favoredNodes) throws IOException {
285    FsPermission perms = FSUtils.getFilePermissions(fs, conf,
286        HConstants.DATA_FILE_UMASK_KEY);
287    return FSUtils.create(conf, fs, path, perms, favoredNodes);
288  }
289
290  /** Additional initialization steps */
291  protected void finishInit(final Configuration conf) {
292    if (blockWriter != null) {
293      throw new IllegalStateException("finishInit called twice");
294    }
295
296    blockWriter = new HFileBlock.Writer(blockEncoder, hFileContext);
297
298    // Data block index writer
299    boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite();
300    dataBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter(blockWriter,
301        cacheIndexesOnWrite ? cacheConf : null,
302        cacheIndexesOnWrite ? name : null);
303    dataBlockIndexWriter.setMaxChunkSize(
304        HFileBlockIndex.getMaxChunkSize(conf));
305    dataBlockIndexWriter.setMinIndexNumEntries(
306        HFileBlockIndex.getMinIndexNumEntries(conf));
307    inlineBlockWriters.add(dataBlockIndexWriter);
308
309    // Meta data block index writer
310    metaBlockIndexWriter = new HFileBlockIndex.BlockIndexWriter();
311    if (LOG.isTraceEnabled()) LOG.trace("Initialized with " + cacheConf);
312  }
313
314  /**
315   * At a block boundary, write all the inline blocks and opens new block.
316   *
317   * @throws IOException
318   */
319  protected void checkBlockBoundary() throws IOException {
320    //for encoder like prefixTree, encoded size is not available, so we have to compare both encoded size
321    //and unencoded size to blocksize limit.
322    if (blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit
323        || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize()) {
324      finishBlock();
325      writeInlineBlocks(false);
326      newBlock();
327    }
328  }
329
330  /** Clean up the data block that is currently being written.*/
331  private void finishBlock() throws IOException {
332    if (!blockWriter.isWriting() || blockWriter.blockSizeWritten() == 0) return;
333
334    // Update the first data block offset if UNSET; used scanning.
335    if (firstDataBlockOffset == UNSET) {
336      firstDataBlockOffset = outputStream.getPos();
337    }
338    // Update the last data block offset each time through here.
339    lastDataBlockOffset = outputStream.getPos();
340    blockWriter.writeHeaderAndData(outputStream);
341    int onDiskSize = blockWriter.getOnDiskSizeWithHeader();
342    Cell indexEntry =
343      getMidpoint(this.comparator, lastCellOfPreviousBlock, firstCellInBlock);
344    dataBlockIndexWriter.addEntry(PrivateCellUtil.getCellKeySerializedAsKeyValueKey(indexEntry),
345      lastDataBlockOffset, onDiskSize);
346    totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader();
347    if (cacheConf.shouldCacheDataOnWrite()) {
348      doCacheOnWrite(lastDataBlockOffset);
349    }
350  }
351
352  /**
353   * Try to return a Cell that falls between <code>left</code> and
354   * <code>right</code> but that is shorter; i.e. takes up less space. This
355   * trick is used building HFile block index. Its an optimization. It does not
356   * always work. In this case we'll just return the <code>right</code> cell.
357   *
358   * @param comparator
359   *          Comparator to use.
360   * @param left
361   * @param right
362   * @return A cell that sorts between <code>left</code> and <code>right</code>.
363   */
364  public static Cell getMidpoint(final CellComparator comparator, final Cell left,
365      final Cell right) {
366    // TODO: Redo so only a single pass over the arrays rather than one to
367    // compare and then a second composing midpoint.
368    if (right == null) {
369      throw new IllegalArgumentException("right cell can not be null");
370    }
371    if (left == null) {
372      return right;
373    }
374    // If Cells from meta table, don't mess around. meta table Cells have schema
375    // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip
376    // out without trying to do this optimization.
377    if (comparator instanceof MetaCellComparator) {
378      return right;
379    }
380    int diff = comparator.compareRows(left, right);
381    if (diff > 0) {
382      throw new IllegalArgumentException("Left row sorts after right row; left="
383          + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
384    }
385    byte[] midRow;
386    boolean bufferBacked = left instanceof ByteBufferExtendedCell
387        && right instanceof ByteBufferExtendedCell;
388    if (diff < 0) {
389      // Left row is < right row.
390      if (bufferBacked) {
391        midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getRowByteBuffer(),
392            ((ByteBufferExtendedCell) left).getRowPosition(), left.getRowLength(),
393            ((ByteBufferExtendedCell) right).getRowByteBuffer(),
394            ((ByteBufferExtendedCell) right).getRowPosition(), right.getRowLength());
395      } else {
396        midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(),
397            left.getRowLength(), right.getRowArray(), right.getRowOffset(), right.getRowLength());
398      }
399      // If midRow is null, just return 'right'. Can't do optimization.
400      if (midRow == null) return right;
401      return PrivateCellUtil.createFirstOnRow(midRow);
402    }
403    // Rows are same. Compare on families.
404    diff = comparator.compareFamilies(left, right);
405    if (diff > 0) {
406      throw new IllegalArgumentException("Left family sorts after right family; left="
407          + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
408    }
409    if (diff < 0) {
410      if (bufferBacked) {
411        midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getFamilyByteBuffer(),
412            ((ByteBufferExtendedCell) left).getFamilyPosition(), left.getFamilyLength(),
413            ((ByteBufferExtendedCell) right).getFamilyByteBuffer(),
414            ((ByteBufferExtendedCell) right).getFamilyPosition(), right.getFamilyLength());
415      } else {
416        midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(),
417            left.getFamilyLength(), right.getFamilyArray(), right.getFamilyOffset(),
418            right.getFamilyLength());
419      }
420      // If midRow is null, just return 'right'. Can't do optimization.
421      if (midRow == null) return right;
422      // Return new Cell where we use right row and then a mid sort family.
423      return PrivateCellUtil.createFirstOnRowFamily(right, midRow, 0, midRow.length);
424    }
425    // Families are same. Compare on qualifiers.
426    diff = comparator.compareQualifiers(left, right);
427    if (diff > 0) {
428      throw new IllegalArgumentException("Left qualifier sorts after right qualifier; left="
429          + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
430    }
431    if (diff < 0) {
432      if (bufferBacked) {
433        midRow = getMinimumMidpointArray(((ByteBufferExtendedCell) left).getQualifierByteBuffer(),
434            ((ByteBufferExtendedCell) left).getQualifierPosition(), left.getQualifierLength(),
435            ((ByteBufferExtendedCell) right).getQualifierByteBuffer(),
436            ((ByteBufferExtendedCell) right).getQualifierPosition(), right.getQualifierLength());
437      } else {
438        midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(),
439            left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(),
440            right.getQualifierLength());
441      }
442      // If midRow is null, just return 'right'. Can't do optimization.
443      if (midRow == null) return right;
444      // Return new Cell where we use right row and family and then a mid sort qualifier.
445      return PrivateCellUtil.createFirstOnRowCol(right, midRow, 0, midRow.length);
446    }
447    // No opportunity for optimization. Just return right key.
448    return right;
449  }
450
451  /**
452   * @param leftArray
453   * @param leftOffset
454   * @param leftLength
455   * @param rightArray
456   * @param rightOffset
457   * @param rightLength
458   * @return Return a new array that is between left and right and minimally
459   *         sized else just return null as indicator that we could not create a
460   *         mid point.
461   */
462  private static byte[] getMinimumMidpointArray(final byte[] leftArray, final int leftOffset,
463      final int leftLength, final byte[] rightArray, final int rightOffset, final int rightLength) {
464    // rows are different
465    int minLength = leftLength < rightLength ? leftLength : rightLength;
466    int diffIdx = 0;
467    while (diffIdx < minLength
468        && leftArray[leftOffset + diffIdx] == rightArray[rightOffset + diffIdx]) {
469      diffIdx++;
470    }
471    byte[] minimumMidpointArray = null;
472    if (diffIdx >= minLength) {
473      // leftKey's row is prefix of rightKey's.
474      minimumMidpointArray = new byte[diffIdx + 1];
475      System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1);
476    } else {
477      int diffByte = leftArray[leftOffset + diffIdx];
478      if ((0xff & diffByte) < 0xff && (diffByte + 1) < (rightArray[rightOffset + diffIdx] & 0xff)) {
479        minimumMidpointArray = new byte[diffIdx + 1];
480        System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx);
481        minimumMidpointArray[diffIdx] = (byte) (diffByte + 1);
482      } else {
483        minimumMidpointArray = new byte[diffIdx + 1];
484        System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1);
485      }
486    }
487    return minimumMidpointArray;
488  }
489
490  private static byte[] getMinimumMidpointArray(ByteBuffer left, int leftOffset, int leftLength,
491      ByteBuffer right, int rightOffset, int rightLength) {
492    // rows are different
493    int minLength = leftLength < rightLength ? leftLength : rightLength;
494    int diffIdx = 0;
495    while (diffIdx < minLength && ByteBufferUtils.toByte(left,
496        leftOffset + diffIdx) == ByteBufferUtils.toByte(right, rightOffset + diffIdx)) {
497      diffIdx++;
498    }
499    byte[] minMidpoint = null;
500    if (diffIdx >= minLength) {
501      // leftKey's row is prefix of rightKey's.
502      minMidpoint = new byte[diffIdx + 1];
503      ByteBufferUtils.copyFromBufferToArray(minMidpoint, right, rightOffset, 0, diffIdx + 1);
504    } else {
505      int diffByte = ByteBufferUtils.toByte(left, leftOffset + diffIdx);
506      if ((0xff & diffByte) < 0xff
507          && (diffByte + 1) < (ByteBufferUtils.toByte(right, rightOffset + diffIdx) & 0xff)) {
508        minMidpoint = new byte[diffIdx + 1];
509        ByteBufferUtils.copyFromBufferToArray(minMidpoint, left, leftOffset, 0, diffIdx);
510        minMidpoint[diffIdx] = (byte) (diffByte + 1);
511      } else {
512        minMidpoint = new byte[diffIdx + 1];
513        ByteBufferUtils.copyFromBufferToArray(minMidpoint, right, rightOffset, 0, diffIdx + 1);
514      }
515    }
516    return minMidpoint;
517  }
518
519  /** Gives inline block writers an opportunity to contribute blocks. */
520  private void writeInlineBlocks(boolean closing) throws IOException {
521    for (InlineBlockWriter ibw : inlineBlockWriters) {
522      while (ibw.shouldWriteBlock(closing)) {
523        long offset = outputStream.getPos();
524        boolean cacheThisBlock = ibw.getCacheOnWrite();
525        ibw.writeInlineBlock(blockWriter.startWriting(
526            ibw.getInlineBlockType()));
527        blockWriter.writeHeaderAndData(outputStream);
528        ibw.blockWritten(offset, blockWriter.getOnDiskSizeWithHeader(),
529            blockWriter.getUncompressedSizeWithoutHeader());
530        totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader();
531
532        if (cacheThisBlock) {
533          doCacheOnWrite(offset);
534        }
535      }
536    }
537  }
538
539  /**
540   * Caches the last written HFile block.
541   * @param offset the offset of the block we want to cache. Used to determine
542   *          the cache key.
543   */
544  private void doCacheOnWrite(long offset) {
545    HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf);
546    cacheConf.getBlockCache().cacheBlock(
547        new BlockCacheKey(name, offset, true, cacheFormatBlock.getBlockType()),
548        cacheFormatBlock);
549  }
550
551  /**
552   * Ready a new block for writing.
553   *
554   * @throws IOException
555   */
556  protected void newBlock() throws IOException {
557    // This is where the next block begins.
558    blockWriter.startWriting(BlockType.DATA);
559    firstCellInBlock = null;
560    if (lastCell != null) {
561      lastCellOfPreviousBlock = lastCell;
562    }
563  }
564
565  /**
566   * Add a meta block to the end of the file. Call before close(). Metadata
567   * blocks are expensive. Fill one with a bunch of serialized data rather than
568   * do a metadata block per metadata instance. If metadata is small, consider
569   * adding to file info using {@link #appendFileInfo(byte[], byte[])}
570   *
571   * @param metaBlockName
572   *          name of the block
573   * @param content
574   *          will call readFields to get data later (DO NOT REUSE)
575   */
576  @Override
577  public void appendMetaBlock(String metaBlockName, Writable content) {
578    byte[] key = Bytes.toBytes(metaBlockName);
579    int i;
580    for (i = 0; i < metaNames.size(); ++i) {
581      // stop when the current key is greater than our own
582      byte[] cur = metaNames.get(i);
583      if (Bytes.BYTES_RAWCOMPARATOR.compare(cur, 0, cur.length, key, 0,
584          key.length) > 0) {
585        break;
586      }
587    }
588    metaNames.add(i, key);
589    metaData.add(i, content);
590  }
591
592  @Override
593  public void close() throws IOException {
594    if (outputStream == null) {
595      return;
596    }
597    // Save data block encoder metadata in the file info.
598    blockEncoder.saveMetadata(this);
599    // Write out the end of the data blocks, then write meta data blocks.
600    // followed by fileinfo, data block index and meta block index.
601
602    finishBlock();
603    writeInlineBlocks(true);
604
605    FixedFileTrailer trailer = new FixedFileTrailer(getMajorVersion(), getMinorVersion());
606
607    // Write out the metadata blocks if any.
608    if (!metaNames.isEmpty()) {
609      for (int i = 0; i < metaNames.size(); ++i) {
610        // store the beginning offset
611        long offset = outputStream.getPos();
612        // write the metadata content
613        DataOutputStream dos = blockWriter.startWriting(BlockType.META);
614        metaData.get(i).write(dos);
615
616        blockWriter.writeHeaderAndData(outputStream);
617        totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader();
618
619        // Add the new meta block to the meta index.
620        metaBlockIndexWriter.addEntry(metaNames.get(i), offset,
621            blockWriter.getOnDiskSizeWithHeader());
622      }
623    }
624
625    // Load-on-open section.
626
627    // Data block index.
628    //
629    // In version 2, this section of the file starts with the root level data
630    // block index. We call a function that writes intermediate-level blocks
631    // first, then root level, and returns the offset of the root level block
632    // index.
633
634    long rootIndexOffset = dataBlockIndexWriter.writeIndexBlocks(outputStream);
635    trailer.setLoadOnOpenOffset(rootIndexOffset);
636
637    // Meta block index.
638    metaBlockIndexWriter.writeSingleLevelIndex(blockWriter.startWriting(
639        BlockType.ROOT_INDEX), "meta");
640    blockWriter.writeHeaderAndData(outputStream);
641    totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader();
642
643    if (this.hFileContext.isIncludesMvcc()) {
644      appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS));
645      appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE));
646    }
647
648    // File info
649    writeFileInfo(trailer, blockWriter.startWriting(BlockType.FILE_INFO));
650    blockWriter.writeHeaderAndData(outputStream);
651    totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader();
652
653    // Load-on-open data supplied by higher levels, e.g. Bloom filters.
654    for (BlockWritable w : additionalLoadOnOpenData){
655      blockWriter.writeBlock(w, outputStream);
656      totalUncompressedBytes += blockWriter.getUncompressedSizeWithHeader();
657    }
658
659    // Now finish off the trailer.
660    trailer.setNumDataIndexLevels(dataBlockIndexWriter.getNumLevels());
661    trailer.setUncompressedDataIndexSize(
662        dataBlockIndexWriter.getTotalUncompressedSize());
663    trailer.setFirstDataBlockOffset(firstDataBlockOffset);
664    trailer.setLastDataBlockOffset(lastDataBlockOffset);
665    trailer.setComparatorClass(comparator.getClass());
666    trailer.setDataIndexCount(dataBlockIndexWriter.getNumRootEntries());
667
668
669    finishClose(trailer);
670
671    blockWriter.release();
672  }
673
674  @Override
675  public void addInlineBlockWriter(InlineBlockWriter ibw) {
676    inlineBlockWriters.add(ibw);
677  }
678
679  @Override
680  public void addGeneralBloomFilter(final BloomFilterWriter bfw) {
681    this.addBloomFilter(bfw, BlockType.GENERAL_BLOOM_META);
682  }
683
684  @Override
685  public void addDeleteFamilyBloomFilter(final BloomFilterWriter bfw) {
686    this.addBloomFilter(bfw, BlockType.DELETE_FAMILY_BLOOM_META);
687  }
688
689  private void addBloomFilter(final BloomFilterWriter bfw,
690      final BlockType blockType) {
691    if (bfw.getKeyCount() <= 0)
692      return;
693
694    if (blockType != BlockType.GENERAL_BLOOM_META &&
695        blockType != BlockType.DELETE_FAMILY_BLOOM_META) {
696      throw new RuntimeException("Block Type: " + blockType.toString() +
697          "is not supported");
698    }
699    additionalLoadOnOpenData.add(new BlockWritable() {
700      @Override
701      public BlockType getBlockType() {
702        return blockType;
703      }
704
705      @Override
706      public void writeToBlock(DataOutput out) throws IOException {
707        bfw.getMetaWriter().write(out);
708        Writable dataWriter = bfw.getDataWriter();
709        if (dataWriter != null)
710          dataWriter.write(out);
711      }
712    });
713  }
714
715  @Override
716  public HFileContext getFileContext() {
717    return hFileContext;
718  }
719
720  /**
721   * Add key/value to file. Keys must be added in an order that agrees with the
722   * Comparator passed on construction.
723   *
724   * @param cell
725   *          Cell to add. Cannot be empty nor null.
726   * @throws IOException
727   */
728  @Override
729  public void append(final Cell cell) throws IOException {
730    // checkKey uses comparator to check we are writing in order.
731    boolean dupKey = checkKey(cell);
732    if (!dupKey) {
733      checkBlockBoundary();
734    }
735
736    if (!blockWriter.isWriting()) {
737      newBlock();
738    }
739
740    blockWriter.write(cell);
741
742    totalKeyLength += PrivateCellUtil.estimatedSerializedSizeOfKey(cell);
743    totalValueLength += cell.getValueLength();
744
745    // Are we the first key in this block?
746    if (firstCellInBlock == null) {
747      // If cell is big, block will be closed and this firstCellInBlock reference will only last
748      // a short while.
749      firstCellInBlock = cell;
750    }
751
752    // TODO: What if cell is 10MB and we write infrequently? We hold on to cell here indefinitely?
753    lastCell = cell;
754    entryCount++;
755    this.maxMemstoreTS = Math.max(this.maxMemstoreTS, cell.getSequenceId());
756    int tagsLength = cell.getTagsLength();
757    if (tagsLength > this.maxTagsLength) {
758      this.maxTagsLength = tagsLength;
759    }
760  }
761
762  @Override
763  public void beforeShipped() throws IOException {
764    // Add clone methods for every cell
765    if (this.lastCell != null) {
766      this.lastCell = KeyValueUtil.toNewKeyCell(this.lastCell);
767    }
768    if (this.firstCellInBlock != null) {
769      this.firstCellInBlock = KeyValueUtil.toNewKeyCell(this.firstCellInBlock);
770    }
771    if (this.lastCellOfPreviousBlock != null) {
772      this.lastCellOfPreviousBlock = KeyValueUtil.toNewKeyCell(this.lastCellOfPreviousBlock);
773    }
774  }
775
776  protected void finishFileInfo() throws IOException {
777    if (lastCell != null) {
778      // Make a copy. The copy is stuffed into our fileinfo map. Needs a clean
779      // byte buffer. Won't take a tuple.
780      byte [] lastKey = PrivateCellUtil.getCellKeySerializedAsKeyValueKey(this.lastCell);
781      fileInfo.append(FileInfo.LASTKEY, lastKey, false);
782    }
783
784    // Average key length.
785    int avgKeyLen =
786        entryCount == 0 ? 0 : (int) (totalKeyLength / entryCount);
787    fileInfo.append(FileInfo.AVG_KEY_LEN, Bytes.toBytes(avgKeyLen), false);
788    fileInfo.append(FileInfo.CREATE_TIME_TS, Bytes.toBytes(hFileContext.getFileCreateTime()),
789      false);
790
791    // Average value length.
792    int avgValueLen =
793        entryCount == 0 ? 0 : (int) (totalValueLength / entryCount);
794    fileInfo.append(FileInfo.AVG_VALUE_LEN, Bytes.toBytes(avgValueLen), false);
795    if (hFileContext.isIncludesTags()) {
796      // When tags are not being written in this file, MAX_TAGS_LEN is excluded
797      // from the FileInfo
798      fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false);
799      boolean tagsCompressed = (hFileContext.getDataBlockEncoding() != DataBlockEncoding.NONE)
800        && hFileContext.isCompressTags();
801      fileInfo.append(FileInfo.TAGS_COMPRESSED, Bytes.toBytes(tagsCompressed), false);
802    }
803  }
804
805  protected int getMajorVersion() {
806    return 3;
807  }
808
809  protected int getMinorVersion() {
810    return HFileReaderImpl.MAX_MINOR_VERSION;
811  }
812
813  protected void finishClose(FixedFileTrailer trailer) throws IOException {
814    // Write out encryption metadata before finalizing if we have a valid crypto context
815    Encryption.Context cryptoContext = hFileContext.getEncryptionContext();
816    if (cryptoContext != Encryption.Context.NONE) {
817      // Wrap the context's key and write it as the encryption metadata, the wrapper includes
818      // all information needed for decryption
819      trailer.setEncryptionKey(EncryptionUtil.wrapKey(cryptoContext.getConf(),
820        cryptoContext.getConf().get(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY,
821          User.getCurrent().getShortName()),
822        cryptoContext.getKey()));
823    }
824    // Now we can finish the close
825    trailer.setMetaIndexCount(metaNames.size());
826    trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize());
827    trailer.setEntryCount(entryCount);
828    trailer.setCompressionCodec(hFileContext.getCompression());
829
830    long startTime = System.currentTimeMillis();
831    trailer.serialize(outputStream);
832    HFile.updateWriteLatency(System.currentTimeMillis() - startTime);
833
834    if (closeOutputStream) {
835      outputStream.close();
836      outputStream = null;
837    }
838  }
839}