001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver;
020
021import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
022import org.apache.hadoop.util.StringUtils;
023import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import java.util.concurrent.ThreadPoolExecutor;
028import java.util.concurrent.atomic.AtomicBoolean;
029
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.Cell;
032import org.apache.hadoop.hbase.CellComparator;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.MemoryCompactionPolicy;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038import org.apache.hadoop.hbase.util.Bytes;
039import org.apache.hadoop.hbase.util.ClassSize;
040import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
041import org.apache.hadoop.hbase.wal.WAL;
042
043/**
044 * A memstore implementation which supports in-memory compaction.
045 * A compaction pipeline is added between the active set and the snapshot data structures;
046 * it consists of a list of segments that are subject to compaction.
047 * Like the snapshot, all pipeline segments are read-only; updates only affect the active set.
048 * To ensure this property we take advantage of the existing blocking mechanism -- the active set
049 * is pushed to the pipeline while holding the region's updatesLock in exclusive mode.
050 * Periodically, a compaction is applied in the background to all pipeline segments resulting
051 * in a single read-only component. The ``old'' segments are discarded when no scanner is reading
052 * them.
053 */
054@InterfaceAudience.Private
055public class CompactingMemStore extends AbstractMemStore {
056
057  // The external setting of the compacting MemStore behaviour
058  public static final String COMPACTING_MEMSTORE_TYPE_KEY =
059      "hbase.hregion.compacting.memstore.type";
060  public static final String COMPACTING_MEMSTORE_TYPE_DEFAULT =
061      String.valueOf(MemoryCompactionPolicy.NONE);
062  // Default fraction of in-memory-flush size w.r.t. flush-to-disk size
063  public static final String IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY =
064      "hbase.memstore.inmemoryflush.threshold.factor";
065  private static final double IN_MEMORY_FLUSH_THRESHOLD_FACTOR_DEFAULT = 0.014;
066
067  private static final Logger LOG = LoggerFactory.getLogger(CompactingMemStore.class);
068  private HStore store;
069  private CompactionPipeline pipeline;
070  protected MemStoreCompactor compactor;
071
072  private long inmemoryFlushSize;       // the threshold on active size for in-memory flush
073  private final AtomicBoolean inMemoryFlushInProgress = new AtomicBoolean(false);
074
075  // inWalReplay is true while we are synchronously replaying the edits from WAL
076  private boolean inWalReplay = false;
077
078  @VisibleForTesting
079  protected final AtomicBoolean allowCompaction = new AtomicBoolean(true);
080  private boolean compositeSnapshot = true;
081
082  /**
083   * Types of indexes (part of immutable segments) to be used after flattening,
084   * compaction, or merge are applied.
085   */
086  public enum IndexType {
087    CSLM_MAP,   // ConcurrentSkipLisMap
088    ARRAY_MAP,  // CellArrayMap
089    CHUNK_MAP   // CellChunkMap
090  }
091
092  private IndexType indexType = IndexType.ARRAY_MAP;  // default implementation
093
094  public static final long DEEP_OVERHEAD = ClassSize.align( AbstractMemStore.DEEP_OVERHEAD
095      + 6 * ClassSize.REFERENCE     // Store, CompactionPipeline,
096                                    // MemStoreCompactor, inMemoryFlushInProgress, allowCompaction,
097                                    // indexType
098      + Bytes.SIZEOF_LONG           // inmemoryFlushSize
099      + 2 * Bytes.SIZEOF_BOOLEAN    // compositeSnapshot and inWalReplay
100      + 2 * ClassSize.ATOMIC_BOOLEAN// inMemoryFlushInProgress and allowCompaction
101      + CompactionPipeline.DEEP_OVERHEAD + MemStoreCompactor.DEEP_OVERHEAD);
102
103  public CompactingMemStore(Configuration conf, CellComparator c,
104      HStore store, RegionServicesForStores regionServices,
105      MemoryCompactionPolicy compactionPolicy) throws IOException {
106    super(conf, c, regionServices);
107    this.store = store;
108    this.regionServices = regionServices;
109    this.pipeline = new CompactionPipeline(getRegionServices());
110    this.compactor = createMemStoreCompactor(compactionPolicy);
111    if (conf.getBoolean(MemStoreLAB.USEMSLAB_KEY, MemStoreLAB.USEMSLAB_DEFAULT)) {
112      // if user requested to work with MSLABs (whether on- or off-heap), then the
113      // immutable segments are going to use CellChunkMap as their index
114      indexType = IndexType.CHUNK_MAP;
115    } else {
116      indexType = IndexType.ARRAY_MAP;
117    }
118    // initialization of the flush size should happen after initialization of the index type
119    // so do not transfer the following method
120    initInmemoryFlushSize(conf);
121    LOG.info("Store={}, in-memory flush size threshold={}, immutable segments index type={}, " +
122            "compactor={}", this.store.getColumnFamilyName(),
123        StringUtils.byteDesc(this.inmemoryFlushSize), this.indexType,
124        (this.compactor == null? "NULL": this.compactor.toString()));
125  }
126
127  @VisibleForTesting
128  protected MemStoreCompactor createMemStoreCompactor(MemoryCompactionPolicy compactionPolicy)
129      throws IllegalArgumentIOException {
130    return new MemStoreCompactor(this, compactionPolicy);
131  }
132
133  private void initInmemoryFlushSize(Configuration conf) {
134    double factor = 0;
135    long memstoreFlushSize = getRegionServices().getMemStoreFlushSize();
136    int numStores = getRegionServices().getNumStores();
137    if (numStores <= 1) {
138      // Family number might also be zero in some of our unit test case
139      numStores = 1;
140    }
141    inmemoryFlushSize = memstoreFlushSize / numStores;
142    // multiply by a factor (the same factor for all index types)
143    factor = conf.getDouble(IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY,
144          IN_MEMORY_FLUSH_THRESHOLD_FACTOR_DEFAULT);
145
146    inmemoryFlushSize = (long) (inmemoryFlushSize * factor);
147  }
148
149  /**
150   * @return Total memory occupied by this MemStore. This won't include any size occupied by the
151   *         snapshot. We assume the snapshot will get cleared soon. This is not thread safe and
152   *         the memstore may be changed while computing its size. It is the responsibility of the
153   *         caller to make sure this doesn't happen.
154   */
155  @Override
156  public MemStoreSize size() {
157    MemStoreSizing memstoreSizing = new NonThreadSafeMemStoreSizing();
158    memstoreSizing.incMemStoreSize(active.getMemStoreSize());
159    for (Segment item : pipeline.getSegments()) {
160      memstoreSizing.incMemStoreSize(item.getMemStoreSize());
161    }
162    return memstoreSizing.getMemStoreSize();
163  }
164
165  /**
166   * This method is called before the flush is executed.
167   * @return an estimation (lower bound) of the unflushed sequence id in memstore after the flush
168   * is executed. if memstore will be cleared returns {@code HConstants.NO_SEQNUM}.
169   */
170  @Override
171  public long preFlushSeqIDEstimation() {
172    if(compositeSnapshot) {
173      return HConstants.NO_SEQNUM;
174    }
175    Segment segment = getLastSegment();
176    if(segment == null) {
177      return HConstants.NO_SEQNUM;
178    }
179    return segment.getMinSequenceId();
180  }
181
182  @Override
183  public boolean isSloppy() {
184    return true;
185  }
186
187  /**
188   * Push the current active memstore segment into the pipeline
189   * and create a snapshot of the tail of current compaction pipeline
190   * Snapshot must be cleared by call to {@link #clearSnapshot}.
191   * {@link #clearSnapshot(long)}.
192   * @return {@link MemStoreSnapshot}
193   */
194  @Override
195  public MemStoreSnapshot snapshot() {
196    // If snapshot currently has entries, then flusher failed or didn't call
197    // cleanup.  Log a warning.
198    if (!this.snapshot.isEmpty()) {
199      LOG.warn("Snapshot called again without clearing previous. " +
200          "Doing nothing. Another ongoing flush or did we fail last attempt?");
201    } else {
202      LOG.debug("FLUSHING TO DISK {}, store={}",
203            getRegionServices().getRegionInfo().getEncodedName(), getFamilyName());
204      stopCompaction();
205      pushActiveToPipeline(this.active);
206      snapshotId = EnvironmentEdgeManager.currentTime();
207      // in both cases whatever is pushed to snapshot is cleared from the pipeline
208      if (compositeSnapshot) {
209        pushPipelineToSnapshot();
210      } else {
211        pushTailToSnapshot();
212      }
213      compactor.resetStats();
214    }
215    return new MemStoreSnapshot(snapshotId, this.snapshot);
216  }
217
218  @Override
219  public MemStoreSize getFlushableSize() {
220    MemStoreSize mss = getSnapshotSize();
221    if (mss.getDataSize() == 0) {
222      // if snapshot is empty the tail of the pipeline (or everything in the memstore) is flushed
223      if (compositeSnapshot) {
224        MemStoreSizing memStoreSizing = new NonThreadSafeMemStoreSizing(pipeline.getPipelineSize());
225        memStoreSizing.incMemStoreSize(this.active.getMemStoreSize());
226        mss = memStoreSizing.getMemStoreSize();
227      } else {
228        mss = pipeline.getTailSize();
229      }
230    }
231    return mss.getDataSize() > 0? mss: this.active.getMemStoreSize();
232  }
233
234  @Override
235  protected long keySize() {
236    // Need to consider dataSize/keySize of all segments in pipeline and active
237    long keySize = this.active.getDataSize();
238    for (Segment segment : this.pipeline.getSegments()) {
239      keySize += segment.getDataSize();
240    }
241    return keySize;
242  }
243
244  @Override
245  protected long heapSize() {
246    // Need to consider heapOverhead of all segments in pipeline and active
247    long h = this.active.getHeapSize();
248    for (Segment segment : this.pipeline.getSegments()) {
249      h += segment.getHeapSize();
250    }
251    return h;
252  }
253
254  @Override
255  public void updateLowestUnflushedSequenceIdInWAL(boolean onlyIfGreater) {
256    long minSequenceId = pipeline.getMinSequenceId();
257    if(minSequenceId != Long.MAX_VALUE) {
258      byte[] encodedRegionName = getRegionServices().getRegionInfo().getEncodedNameAsBytes();
259      byte[] familyName = getFamilyNameInBytes();
260      WAL WAL = getRegionServices().getWAL();
261      if (WAL != null) {
262        WAL.updateStore(encodedRegionName, familyName, minSequenceId, onlyIfGreater);
263      }
264    }
265  }
266
267  /**
268   * This message intends to inform the MemStore that next coming updates
269   * are going to be part of the replaying edits from WAL
270   */
271  @Override
272  public void startReplayingFromWAL() {
273    inWalReplay = true;
274  }
275
276  /**
277   * This message intends to inform the MemStore that the replaying edits from WAL
278   * are done
279   */
280  @Override
281  public void stopReplayingFromWAL() {
282    inWalReplay = false;
283  }
284
285  // the getSegments() method is used for tests only
286  @VisibleForTesting
287  @Override
288  protected List<Segment> getSegments() {
289    List<? extends Segment> pipelineList = pipeline.getSegments();
290    List<Segment> list = new ArrayList<>(pipelineList.size() + 2);
291    list.add(this.active);
292    list.addAll(pipelineList);
293    list.addAll(this.snapshot.getAllSegments());
294
295    return list;
296  }
297
298  // the following three methods allow to manipulate the settings of composite snapshot
299  public void setCompositeSnapshot(boolean useCompositeSnapshot) {
300    this.compositeSnapshot = useCompositeSnapshot;
301  }
302
303  public boolean swapCompactedSegments(VersionedSegmentsList versionedList, ImmutableSegment result,
304      boolean merge) {
305    // last true stands for updating the region size
306    return pipeline.swap(versionedList, result, !merge, true);
307  }
308
309  /**
310   * @param requesterVersion The caller must hold the VersionedList of the pipeline
311   *           with version taken earlier. This version must be passed as a parameter here.
312   *           The flattening happens only if versions match.
313   */
314  public void flattenOneSegment(long requesterVersion,  MemStoreCompactionStrategy.Action action) {
315    pipeline.flattenOneSegment(requesterVersion, indexType, action);
316  }
317
318  // setter is used only for testability
319  @VisibleForTesting
320  void setIndexType(IndexType type) {
321    indexType = type;
322    // Because this functionality is for testing only and tests are setting in-memory flush size
323    // according to their need, there is no setting of in-memory flush size, here.
324    // If it is needed, please change in-memory flush size explicitly
325  }
326
327  public IndexType getIndexType() {
328    return indexType;
329  }
330
331  public boolean hasImmutableSegments() {
332    return !pipeline.isEmpty();
333  }
334
335  public VersionedSegmentsList getImmutableSegments() {
336    return pipeline.getVersionedList();
337  }
338
339  public long getSmallestReadPoint() {
340    return store.getSmallestReadPoint();
341  }
342
343  public HStore getStore() {
344    return store;
345  }
346
347  public String getFamilyName() {
348    return Bytes.toString(getFamilyNameInBytes());
349  }
350
351  @Override
352  public List<KeyValueScanner> getScanners(long readPt) throws IOException {
353    MutableSegment activeTmp = active;
354    List<? extends Segment> pipelineList = pipeline.getSegments();
355    List<? extends Segment> snapshotList = snapshot.getAllSegments();
356    long numberOfSegments = 1L + pipelineList.size() + snapshotList.size();
357    // The list of elements in pipeline + the active element + the snapshot segment
358    List<KeyValueScanner> list = createList((int) numberOfSegments);
359    addToScanners(activeTmp, readPt, list);
360    addToScanners(pipelineList, readPt, list);
361    addToScanners(snapshotList, readPt, list);
362    return list;
363  }
364
365   @VisibleForTesting
366   protected List<KeyValueScanner> createList(int capacity) {
367     return new ArrayList<>(capacity);
368   }
369
370  /**
371   * Check whether anything need to be done based on the current active set size.
372   * The method is invoked upon every addition to the active set.
373   * For CompactingMemStore, flush the active set to the read-only memory if it's
374   * size is above threshold
375   */
376  @Override
377  protected void checkActiveSize() {
378    if (shouldFlushInMemory()) {
379      /* The thread is dispatched to flush-in-memory. This cannot be done
380      * on the same thread, because for flush-in-memory we require updatesLock
381      * in exclusive mode while this method (checkActiveSize) is invoked holding updatesLock
382      * in the shared mode. */
383      InMemoryFlushRunnable runnable = new InMemoryFlushRunnable();
384      if (LOG.isTraceEnabled()) {
385        LOG.trace(
386          "Dispatching the MemStore in-memory flush for store " + store.getColumnFamilyName());
387      }
388      getPool().execute(runnable);
389    }
390  }
391
392  // internally used method, externally visible only for tests
393  // when invoked directly from tests it must be verified that the caller doesn't hold updatesLock,
394  // otherwise there is a deadlock
395  @VisibleForTesting
396  void flushInMemory() throws IOException {
397    // setting the inMemoryFlushInProgress flag again for the case this method is invoked
398    // directly (only in tests) in the common path setting from true to true is idempotent
399    inMemoryFlushInProgress.set(true);
400    try {
401      // Phase I: Update the pipeline
402      getRegionServices().blockUpdates();
403      try {
404        LOG.trace("IN-MEMORY FLUSH: Pushing active segment into compaction pipeline");
405        pushActiveToPipeline(this.active);
406      } finally {
407        getRegionServices().unblockUpdates();
408      }
409
410      // Used by tests
411      if (!allowCompaction.get()) {
412        return;
413      }
414      // Phase II: Compact the pipeline
415      try {
416        // Speculative compaction execution, may be interrupted if flush is forced while
417        // compaction is in progress
418        compactor.start();
419      } catch (IOException e) {
420        LOG.warn("Unable to run in-memory compaction on {}/{}; exception={}",
421            getRegionServices().getRegionInfo().getEncodedName(), getFamilyName(), e);
422      }
423    } finally {
424      inMemoryFlushInProgress.set(false);
425      LOG.trace("IN-MEMORY FLUSH: end");
426    }
427  }
428
429  private Segment getLastSegment() {
430    Segment localActive = getActive();
431    Segment tail = pipeline.getTail();
432    return tail == null ? localActive : tail;
433  }
434
435  private byte[] getFamilyNameInBytes() {
436    return store.getColumnFamilyDescriptor().getName();
437  }
438
439  private ThreadPoolExecutor getPool() {
440    return getRegionServices().getInMemoryCompactionPool();
441  }
442
443  @VisibleForTesting
444  protected boolean shouldFlushInMemory() {
445    if (this.active.getDataSize() > inmemoryFlushSize) { // size above flush threshold
446      if (inWalReplay) {  // when replaying edits from WAL there is no need in in-memory flush
447        return false;     // regardless the size
448      }
449      // the inMemoryFlushInProgress is CASed to be true here in order to mutual exclude
450      // the insert of the active into the compaction pipeline
451      return (inMemoryFlushInProgress.compareAndSet(false,true));
452    }
453    return false;
454  }
455
456  /**
457   * The request to cancel the compaction asynchronous task (caused by in-memory flush)
458   * The compaction may still happen if the request was sent too late
459   * Non-blocking request
460   */
461  private void stopCompaction() {
462    if (inMemoryFlushInProgress.get()) {
463      compactor.stop();
464    }
465  }
466
467  protected void pushActiveToPipeline(MutableSegment active) {
468    if (!active.isEmpty()) {
469      pipeline.pushHead(active);
470      resetActive();
471    }
472  }
473
474  private void pushTailToSnapshot() {
475    VersionedSegmentsList segments = pipeline.getVersionedTail();
476    pushToSnapshot(segments.getStoreSegments());
477    // In Swap: don't close segments (they are in snapshot now) and don't update the region size
478    pipeline.swap(segments,null,false, false);
479  }
480
481  private void pushPipelineToSnapshot() {
482    int iterationsCnt = 0;
483    boolean done = false;
484    while (!done) {
485      iterationsCnt++;
486      VersionedSegmentsList segments = pipeline.getVersionedList();
487      pushToSnapshot(segments.getStoreSegments());
488      // swap can return false in case the pipeline was updated by ongoing compaction
489      // and the version increase, the chance of it happenning is very low
490      // In Swap: don't close segments (they are in snapshot now) and don't update the region size
491      done = pipeline.swap(segments, null, false, false);
492      if (iterationsCnt>2) {
493        // practically it is impossible that this loop iterates more than two times
494        // (because the compaction is stopped and none restarts it while in snapshot request),
495        // however stopping here for the case of the infinite loop causing by any error
496        LOG.warn("Multiple unsuccessful attempts to push the compaction pipeline to snapshot," +
497            " while flushing to disk.");
498        this.snapshot = SegmentFactory.instance().createImmutableSegment(getComparator());
499        break;
500      }
501    }
502  }
503
504  private void pushToSnapshot(List<ImmutableSegment> segments) {
505    if(segments.isEmpty()) return;
506    if(segments.size() == 1 && !segments.get(0).isEmpty()) {
507      this.snapshot = segments.get(0);
508      return;
509    } else { // create composite snapshot
510      this.snapshot =
511          SegmentFactory.instance().createCompositeImmutableSegment(getComparator(), segments);
512    }
513  }
514
515  private RegionServicesForStores getRegionServices() {
516    return regionServices;
517  }
518
519  /**
520  * The in-memory-flusher thread performs the flush asynchronously.
521  * There is at most one thread per memstore instance.
522  * It takes the updatesLock exclusively, pushes active into the pipeline, releases updatesLock
523  * and compacts the pipeline.
524  */
525  private class InMemoryFlushRunnable implements Runnable {
526
527    @Override
528    public void run() {
529      try {
530        flushInMemory();
531      } catch (IOException e) {
532        LOG.warn("Unable to run memstore compaction. region "
533            + getRegionServices().getRegionInfo().getRegionNameAsString()
534            + "store: "+ getFamilyName(), e);
535      }
536    }
537  }
538
539  @VisibleForTesting
540  boolean isMemStoreFlushingInMemory() {
541    return inMemoryFlushInProgress.get();
542  }
543
544  /**
545   * @param cell Find the row that comes after this one.  If null, we return the
546   *             first.
547   * @return Next row or null if none found.
548   */
549  Cell getNextRow(final Cell cell) {
550    Cell lowest = null;
551    List<Segment> segments = getSegments();
552    for (Segment segment : segments) {
553      if (lowest == null) {
554        lowest = getNextRow(cell, segment.getCellSet());
555      } else {
556        lowest = getLowest(lowest, getNextRow(cell, segment.getCellSet()));
557      }
558    }
559    return lowest;
560  }
561
562  @VisibleForTesting
563  long getInmemoryFlushSize() {
564    return inmemoryFlushSize;
565  }
566
567  // debug method
568  public void debug() {
569    String msg = "active size=" + this.active.getDataSize();
570    msg += " in-memory flush size is "+ inmemoryFlushSize;
571    msg += " allow compaction is "+ (allowCompaction.get() ? "true" : "false");
572    msg += " inMemoryFlushInProgress is "+ (inMemoryFlushInProgress.get() ? "true" : "false");
573    LOG.debug(msg);
574  }
575
576}