001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver; 020 021import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException; 022import org.apache.hadoop.util.StringUtils; 023import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import java.util.concurrent.ThreadPoolExecutor; 028import java.util.concurrent.atomic.AtomicBoolean; 029 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.Cell; 032import org.apache.hadoop.hbase.CellComparator; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.MemoryCompactionPolicy; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038import org.apache.hadoop.hbase.util.Bytes; 039import org.apache.hadoop.hbase.util.ClassSize; 040import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 041import org.apache.hadoop.hbase.wal.WAL; 042 043/** 044 * A memstore implementation which supports in-memory compaction. 045 * A compaction pipeline is added between the active set and the snapshot data structures; 046 * it consists of a list of segments that are subject to compaction. 047 * Like the snapshot, all pipeline segments are read-only; updates only affect the active set. 048 * To ensure this property we take advantage of the existing blocking mechanism -- the active set 049 * is pushed to the pipeline while holding the region's updatesLock in exclusive mode. 050 * Periodically, a compaction is applied in the background to all pipeline segments resulting 051 * in a single read-only component. The ``old'' segments are discarded when no scanner is reading 052 * them. 053 */ 054@InterfaceAudience.Private 055public class CompactingMemStore extends AbstractMemStore { 056 057 // The external setting of the compacting MemStore behaviour 058 public static final String COMPACTING_MEMSTORE_TYPE_KEY = 059 "hbase.hregion.compacting.memstore.type"; 060 public static final String COMPACTING_MEMSTORE_TYPE_DEFAULT = 061 String.valueOf(MemoryCompactionPolicy.NONE); 062 // Default fraction of in-memory-flush size w.r.t. flush-to-disk size 063 public static final String IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY = 064 "hbase.memstore.inmemoryflush.threshold.factor"; 065 private static final double IN_MEMORY_FLUSH_THRESHOLD_FACTOR_DEFAULT = 0.014; 066 067 private static final Logger LOG = LoggerFactory.getLogger(CompactingMemStore.class); 068 private HStore store; 069 private CompactionPipeline pipeline; 070 protected MemStoreCompactor compactor; 071 072 private long inmemoryFlushSize; // the threshold on active size for in-memory flush 073 private final AtomicBoolean inMemoryFlushInProgress = new AtomicBoolean(false); 074 075 // inWalReplay is true while we are synchronously replaying the edits from WAL 076 private boolean inWalReplay = false; 077 078 @VisibleForTesting 079 protected final AtomicBoolean allowCompaction = new AtomicBoolean(true); 080 private boolean compositeSnapshot = true; 081 082 /** 083 * Types of indexes (part of immutable segments) to be used after flattening, 084 * compaction, or merge are applied. 085 */ 086 public enum IndexType { 087 CSLM_MAP, // ConcurrentSkipLisMap 088 ARRAY_MAP, // CellArrayMap 089 CHUNK_MAP // CellChunkMap 090 } 091 092 private IndexType indexType = IndexType.ARRAY_MAP; // default implementation 093 094 public static final long DEEP_OVERHEAD = ClassSize.align( AbstractMemStore.DEEP_OVERHEAD 095 + 6 * ClassSize.REFERENCE // Store, CompactionPipeline, 096 // MemStoreCompactor, inMemoryFlushInProgress, allowCompaction, 097 // indexType 098 + Bytes.SIZEOF_LONG // inmemoryFlushSize 099 + 2 * Bytes.SIZEOF_BOOLEAN // compositeSnapshot and inWalReplay 100 + 2 * ClassSize.ATOMIC_BOOLEAN// inMemoryFlushInProgress and allowCompaction 101 + CompactionPipeline.DEEP_OVERHEAD + MemStoreCompactor.DEEP_OVERHEAD); 102 103 public CompactingMemStore(Configuration conf, CellComparator c, 104 HStore store, RegionServicesForStores regionServices, 105 MemoryCompactionPolicy compactionPolicy) throws IOException { 106 super(conf, c, regionServices); 107 this.store = store; 108 this.regionServices = regionServices; 109 this.pipeline = new CompactionPipeline(getRegionServices()); 110 this.compactor = createMemStoreCompactor(compactionPolicy); 111 if (conf.getBoolean(MemStoreLAB.USEMSLAB_KEY, MemStoreLAB.USEMSLAB_DEFAULT)) { 112 // if user requested to work with MSLABs (whether on- or off-heap), then the 113 // immutable segments are going to use CellChunkMap as their index 114 indexType = IndexType.CHUNK_MAP; 115 } else { 116 indexType = IndexType.ARRAY_MAP; 117 } 118 // initialization of the flush size should happen after initialization of the index type 119 // so do not transfer the following method 120 initInmemoryFlushSize(conf); 121 LOG.info("Store={}, in-memory flush size threshold={}, immutable segments index type={}, " + 122 "compactor={}", this.store.getColumnFamilyName(), 123 StringUtils.byteDesc(this.inmemoryFlushSize), this.indexType, 124 (this.compactor == null? "NULL": this.compactor.toString())); 125 } 126 127 @VisibleForTesting 128 protected MemStoreCompactor createMemStoreCompactor(MemoryCompactionPolicy compactionPolicy) 129 throws IllegalArgumentIOException { 130 return new MemStoreCompactor(this, compactionPolicy); 131 } 132 133 private void initInmemoryFlushSize(Configuration conf) { 134 double factor = 0; 135 long memstoreFlushSize = getRegionServices().getMemStoreFlushSize(); 136 int numStores = getRegionServices().getNumStores(); 137 if (numStores <= 1) { 138 // Family number might also be zero in some of our unit test case 139 numStores = 1; 140 } 141 inmemoryFlushSize = memstoreFlushSize / numStores; 142 // multiply by a factor (the same factor for all index types) 143 factor = conf.getDouble(IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY, 144 IN_MEMORY_FLUSH_THRESHOLD_FACTOR_DEFAULT); 145 146 inmemoryFlushSize = (long) (inmemoryFlushSize * factor); 147 } 148 149 /** 150 * @return Total memory occupied by this MemStore. This won't include any size occupied by the 151 * snapshot. We assume the snapshot will get cleared soon. This is not thread safe and 152 * the memstore may be changed while computing its size. It is the responsibility of the 153 * caller to make sure this doesn't happen. 154 */ 155 @Override 156 public MemStoreSize size() { 157 MemStoreSizing memstoreSizing = new NonThreadSafeMemStoreSizing(); 158 memstoreSizing.incMemStoreSize(active.getMemStoreSize()); 159 for (Segment item : pipeline.getSegments()) { 160 memstoreSizing.incMemStoreSize(item.getMemStoreSize()); 161 } 162 return memstoreSizing.getMemStoreSize(); 163 } 164 165 /** 166 * This method is called before the flush is executed. 167 * @return an estimation (lower bound) of the unflushed sequence id in memstore after the flush 168 * is executed. if memstore will be cleared returns {@code HConstants.NO_SEQNUM}. 169 */ 170 @Override 171 public long preFlushSeqIDEstimation() { 172 if(compositeSnapshot) { 173 return HConstants.NO_SEQNUM; 174 } 175 Segment segment = getLastSegment(); 176 if(segment == null) { 177 return HConstants.NO_SEQNUM; 178 } 179 return segment.getMinSequenceId(); 180 } 181 182 @Override 183 public boolean isSloppy() { 184 return true; 185 } 186 187 /** 188 * Push the current active memstore segment into the pipeline 189 * and create a snapshot of the tail of current compaction pipeline 190 * Snapshot must be cleared by call to {@link #clearSnapshot}. 191 * {@link #clearSnapshot(long)}. 192 * @return {@link MemStoreSnapshot} 193 */ 194 @Override 195 public MemStoreSnapshot snapshot() { 196 // If snapshot currently has entries, then flusher failed or didn't call 197 // cleanup. Log a warning. 198 if (!this.snapshot.isEmpty()) { 199 LOG.warn("Snapshot called again without clearing previous. " + 200 "Doing nothing. Another ongoing flush or did we fail last attempt?"); 201 } else { 202 LOG.debug("FLUSHING TO DISK {}, store={}", 203 getRegionServices().getRegionInfo().getEncodedName(), getFamilyName()); 204 stopCompaction(); 205 pushActiveToPipeline(this.active); 206 snapshotId = EnvironmentEdgeManager.currentTime(); 207 // in both cases whatever is pushed to snapshot is cleared from the pipeline 208 if (compositeSnapshot) { 209 pushPipelineToSnapshot(); 210 } else { 211 pushTailToSnapshot(); 212 } 213 compactor.resetStats(); 214 } 215 return new MemStoreSnapshot(snapshotId, this.snapshot); 216 } 217 218 @Override 219 public MemStoreSize getFlushableSize() { 220 MemStoreSize mss = getSnapshotSize(); 221 if (mss.getDataSize() == 0) { 222 // if snapshot is empty the tail of the pipeline (or everything in the memstore) is flushed 223 if (compositeSnapshot) { 224 MemStoreSizing memStoreSizing = new NonThreadSafeMemStoreSizing(pipeline.getPipelineSize()); 225 memStoreSizing.incMemStoreSize(this.active.getMemStoreSize()); 226 mss = memStoreSizing.getMemStoreSize(); 227 } else { 228 mss = pipeline.getTailSize(); 229 } 230 } 231 return mss.getDataSize() > 0? mss: this.active.getMemStoreSize(); 232 } 233 234 @Override 235 protected long keySize() { 236 // Need to consider dataSize/keySize of all segments in pipeline and active 237 long keySize = this.active.getDataSize(); 238 for (Segment segment : this.pipeline.getSegments()) { 239 keySize += segment.getDataSize(); 240 } 241 return keySize; 242 } 243 244 @Override 245 protected long heapSize() { 246 // Need to consider heapOverhead of all segments in pipeline and active 247 long h = this.active.getHeapSize(); 248 for (Segment segment : this.pipeline.getSegments()) { 249 h += segment.getHeapSize(); 250 } 251 return h; 252 } 253 254 @Override 255 public void updateLowestUnflushedSequenceIdInWAL(boolean onlyIfGreater) { 256 long minSequenceId = pipeline.getMinSequenceId(); 257 if(minSequenceId != Long.MAX_VALUE) { 258 byte[] encodedRegionName = getRegionServices().getRegionInfo().getEncodedNameAsBytes(); 259 byte[] familyName = getFamilyNameInBytes(); 260 WAL WAL = getRegionServices().getWAL(); 261 if (WAL != null) { 262 WAL.updateStore(encodedRegionName, familyName, minSequenceId, onlyIfGreater); 263 } 264 } 265 } 266 267 /** 268 * This message intends to inform the MemStore that next coming updates 269 * are going to be part of the replaying edits from WAL 270 */ 271 @Override 272 public void startReplayingFromWAL() { 273 inWalReplay = true; 274 } 275 276 /** 277 * This message intends to inform the MemStore that the replaying edits from WAL 278 * are done 279 */ 280 @Override 281 public void stopReplayingFromWAL() { 282 inWalReplay = false; 283 } 284 285 // the getSegments() method is used for tests only 286 @VisibleForTesting 287 @Override 288 protected List<Segment> getSegments() { 289 List<? extends Segment> pipelineList = pipeline.getSegments(); 290 List<Segment> list = new ArrayList<>(pipelineList.size() + 2); 291 list.add(this.active); 292 list.addAll(pipelineList); 293 list.addAll(this.snapshot.getAllSegments()); 294 295 return list; 296 } 297 298 // the following three methods allow to manipulate the settings of composite snapshot 299 public void setCompositeSnapshot(boolean useCompositeSnapshot) { 300 this.compositeSnapshot = useCompositeSnapshot; 301 } 302 303 public boolean swapCompactedSegments(VersionedSegmentsList versionedList, ImmutableSegment result, 304 boolean merge) { 305 // last true stands for updating the region size 306 return pipeline.swap(versionedList, result, !merge, true); 307 } 308 309 /** 310 * @param requesterVersion The caller must hold the VersionedList of the pipeline 311 * with version taken earlier. This version must be passed as a parameter here. 312 * The flattening happens only if versions match. 313 */ 314 public void flattenOneSegment(long requesterVersion, MemStoreCompactionStrategy.Action action) { 315 pipeline.flattenOneSegment(requesterVersion, indexType, action); 316 } 317 318 // setter is used only for testability 319 @VisibleForTesting 320 void setIndexType(IndexType type) { 321 indexType = type; 322 // Because this functionality is for testing only and tests are setting in-memory flush size 323 // according to their need, there is no setting of in-memory flush size, here. 324 // If it is needed, please change in-memory flush size explicitly 325 } 326 327 public IndexType getIndexType() { 328 return indexType; 329 } 330 331 public boolean hasImmutableSegments() { 332 return !pipeline.isEmpty(); 333 } 334 335 public VersionedSegmentsList getImmutableSegments() { 336 return pipeline.getVersionedList(); 337 } 338 339 public long getSmallestReadPoint() { 340 return store.getSmallestReadPoint(); 341 } 342 343 public HStore getStore() { 344 return store; 345 } 346 347 public String getFamilyName() { 348 return Bytes.toString(getFamilyNameInBytes()); 349 } 350 351 @Override 352 public List<KeyValueScanner> getScanners(long readPt) throws IOException { 353 MutableSegment activeTmp = active; 354 List<? extends Segment> pipelineList = pipeline.getSegments(); 355 List<? extends Segment> snapshotList = snapshot.getAllSegments(); 356 long numberOfSegments = 1L + pipelineList.size() + snapshotList.size(); 357 // The list of elements in pipeline + the active element + the snapshot segment 358 List<KeyValueScanner> list = createList((int) numberOfSegments); 359 addToScanners(activeTmp, readPt, list); 360 addToScanners(pipelineList, readPt, list); 361 addToScanners(snapshotList, readPt, list); 362 return list; 363 } 364 365 @VisibleForTesting 366 protected List<KeyValueScanner> createList(int capacity) { 367 return new ArrayList<>(capacity); 368 } 369 370 /** 371 * Check whether anything need to be done based on the current active set size. 372 * The method is invoked upon every addition to the active set. 373 * For CompactingMemStore, flush the active set to the read-only memory if it's 374 * size is above threshold 375 */ 376 @Override 377 protected void checkActiveSize() { 378 if (shouldFlushInMemory()) { 379 /* The thread is dispatched to flush-in-memory. This cannot be done 380 * on the same thread, because for flush-in-memory we require updatesLock 381 * in exclusive mode while this method (checkActiveSize) is invoked holding updatesLock 382 * in the shared mode. */ 383 InMemoryFlushRunnable runnable = new InMemoryFlushRunnable(); 384 if (LOG.isTraceEnabled()) { 385 LOG.trace( 386 "Dispatching the MemStore in-memory flush for store " + store.getColumnFamilyName()); 387 } 388 getPool().execute(runnable); 389 } 390 } 391 392 // internally used method, externally visible only for tests 393 // when invoked directly from tests it must be verified that the caller doesn't hold updatesLock, 394 // otherwise there is a deadlock 395 @VisibleForTesting 396 void flushInMemory() throws IOException { 397 // setting the inMemoryFlushInProgress flag again for the case this method is invoked 398 // directly (only in tests) in the common path setting from true to true is idempotent 399 inMemoryFlushInProgress.set(true); 400 try { 401 // Phase I: Update the pipeline 402 getRegionServices().blockUpdates(); 403 try { 404 LOG.trace("IN-MEMORY FLUSH: Pushing active segment into compaction pipeline"); 405 pushActiveToPipeline(this.active); 406 } finally { 407 getRegionServices().unblockUpdates(); 408 } 409 410 // Used by tests 411 if (!allowCompaction.get()) { 412 return; 413 } 414 // Phase II: Compact the pipeline 415 try { 416 // Speculative compaction execution, may be interrupted if flush is forced while 417 // compaction is in progress 418 compactor.start(); 419 } catch (IOException e) { 420 LOG.warn("Unable to run in-memory compaction on {}/{}; exception={}", 421 getRegionServices().getRegionInfo().getEncodedName(), getFamilyName(), e); 422 } 423 } finally { 424 inMemoryFlushInProgress.set(false); 425 LOG.trace("IN-MEMORY FLUSH: end"); 426 } 427 } 428 429 private Segment getLastSegment() { 430 Segment localActive = getActive(); 431 Segment tail = pipeline.getTail(); 432 return tail == null ? localActive : tail; 433 } 434 435 private byte[] getFamilyNameInBytes() { 436 return store.getColumnFamilyDescriptor().getName(); 437 } 438 439 private ThreadPoolExecutor getPool() { 440 return getRegionServices().getInMemoryCompactionPool(); 441 } 442 443 @VisibleForTesting 444 protected boolean shouldFlushInMemory() { 445 if (this.active.getDataSize() > inmemoryFlushSize) { // size above flush threshold 446 if (inWalReplay) { // when replaying edits from WAL there is no need in in-memory flush 447 return false; // regardless the size 448 } 449 // the inMemoryFlushInProgress is CASed to be true here in order to mutual exclude 450 // the insert of the active into the compaction pipeline 451 return (inMemoryFlushInProgress.compareAndSet(false,true)); 452 } 453 return false; 454 } 455 456 /** 457 * The request to cancel the compaction asynchronous task (caused by in-memory flush) 458 * The compaction may still happen if the request was sent too late 459 * Non-blocking request 460 */ 461 private void stopCompaction() { 462 if (inMemoryFlushInProgress.get()) { 463 compactor.stop(); 464 } 465 } 466 467 protected void pushActiveToPipeline(MutableSegment active) { 468 if (!active.isEmpty()) { 469 pipeline.pushHead(active); 470 resetActive(); 471 } 472 } 473 474 private void pushTailToSnapshot() { 475 VersionedSegmentsList segments = pipeline.getVersionedTail(); 476 pushToSnapshot(segments.getStoreSegments()); 477 // In Swap: don't close segments (they are in snapshot now) and don't update the region size 478 pipeline.swap(segments,null,false, false); 479 } 480 481 private void pushPipelineToSnapshot() { 482 int iterationsCnt = 0; 483 boolean done = false; 484 while (!done) { 485 iterationsCnt++; 486 VersionedSegmentsList segments = pipeline.getVersionedList(); 487 pushToSnapshot(segments.getStoreSegments()); 488 // swap can return false in case the pipeline was updated by ongoing compaction 489 // and the version increase, the chance of it happenning is very low 490 // In Swap: don't close segments (they are in snapshot now) and don't update the region size 491 done = pipeline.swap(segments, null, false, false); 492 if (iterationsCnt>2) { 493 // practically it is impossible that this loop iterates more than two times 494 // (because the compaction is stopped and none restarts it while in snapshot request), 495 // however stopping here for the case of the infinite loop causing by any error 496 LOG.warn("Multiple unsuccessful attempts to push the compaction pipeline to snapshot," + 497 " while flushing to disk."); 498 this.snapshot = SegmentFactory.instance().createImmutableSegment(getComparator()); 499 break; 500 } 501 } 502 } 503 504 private void pushToSnapshot(List<ImmutableSegment> segments) { 505 if(segments.isEmpty()) return; 506 if(segments.size() == 1 && !segments.get(0).isEmpty()) { 507 this.snapshot = segments.get(0); 508 return; 509 } else { // create composite snapshot 510 this.snapshot = 511 SegmentFactory.instance().createCompositeImmutableSegment(getComparator(), segments); 512 } 513 } 514 515 private RegionServicesForStores getRegionServices() { 516 return regionServices; 517 } 518 519 /** 520 * The in-memory-flusher thread performs the flush asynchronously. 521 * There is at most one thread per memstore instance. 522 * It takes the updatesLock exclusively, pushes active into the pipeline, releases updatesLock 523 * and compacts the pipeline. 524 */ 525 private class InMemoryFlushRunnable implements Runnable { 526 527 @Override 528 public void run() { 529 try { 530 flushInMemory(); 531 } catch (IOException e) { 532 LOG.warn("Unable to run memstore compaction. region " 533 + getRegionServices().getRegionInfo().getRegionNameAsString() 534 + "store: "+ getFamilyName(), e); 535 } 536 } 537 } 538 539 @VisibleForTesting 540 boolean isMemStoreFlushingInMemory() { 541 return inMemoryFlushInProgress.get(); 542 } 543 544 /** 545 * @param cell Find the row that comes after this one. If null, we return the 546 * first. 547 * @return Next row or null if none found. 548 */ 549 Cell getNextRow(final Cell cell) { 550 Cell lowest = null; 551 List<Segment> segments = getSegments(); 552 for (Segment segment : segments) { 553 if (lowest == null) { 554 lowest = getNextRow(cell, segment.getCellSet()); 555 } else { 556 lowest = getLowest(lowest, getNextRow(cell, segment.getCellSet())); 557 } 558 } 559 return lowest; 560 } 561 562 @VisibleForTesting 563 long getInmemoryFlushSize() { 564 return inmemoryFlushSize; 565 } 566 567 // debug method 568 public void debug() { 569 String msg = "active size=" + this.active.getDataSize(); 570 msg += " in-memory flush size is "+ inmemoryFlushSize; 571 msg += " allow compaction is "+ (allowCompaction.get() ? "true" : "false"); 572 msg += " inMemoryFlushInProgress is "+ (inMemoryFlushInProgress.get() ? "true" : "false"); 573 LOG.debug(msg); 574 } 575 576}