001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotNull;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.Arrays;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.*;
029import org.apache.hadoop.hbase.HBaseClassTestRule;
030import org.apache.hadoop.hbase.client.Get;
031import org.apache.hadoop.hbase.client.Put;
032import org.apache.hadoop.hbase.client.Result;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.testclassification.LargeTests;
035import org.apache.hadoop.hbase.testclassification.RegionServerTests;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.Threads;
038import org.apache.hadoop.hbase.wal.WAL;
039import org.junit.Before;
040import org.junit.ClassRule;
041import org.junit.Test;
042import org.junit.experimental.categories.Category;
043
044/**
045 * This test verifies the correctness of the Per Column Family flushing strategy
046 * when part of the memstores are compacted memstores
047 */
048@Category({ RegionServerTests.class, LargeTests.class })
049public class TestWalAndCompactingMemStoreFlush {
050
051  @ClassRule
052  public static final HBaseClassTestRule CLASS_RULE =
053      HBaseClassTestRule.forClass(TestWalAndCompactingMemStoreFlush.class);
054
055  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
056  private static final Path DIR = TEST_UTIL.getDataTestDir("TestHRegion");
057  public static final TableName TABLENAME = TableName.valueOf("TestWalAndCompactingMemStoreFlush",
058      "t1");
059
060  public static final byte[][] FAMILIES = { Bytes.toBytes("f1"), Bytes.toBytes("f2"),
061      Bytes.toBytes("f3"), Bytes.toBytes("f4"), Bytes.toBytes("f5") };
062
063  public static final byte[] FAMILY1 = FAMILIES[0];
064  public static final byte[] FAMILY2 = FAMILIES[1];
065  public static final byte[] FAMILY3 = FAMILIES[2];
066
067  private Configuration conf;
068
069  private HRegion initHRegion(String callingMethod, Configuration conf) throws IOException {
070    int i=0;
071    HTableDescriptor htd = new HTableDescriptor(TABLENAME);
072    for (byte[] family : FAMILIES) {
073      HColumnDescriptor hcd = new HColumnDescriptor(family);
074      // even column families are going to have compacted memstore
075      if(i%2 == 0) {
076        hcd.setInMemoryCompaction(MemoryCompactionPolicy.valueOf(
077            conf.get(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY)));
078      } else {
079        hcd.setInMemoryCompaction(MemoryCompactionPolicy.NONE);
080      }
081      htd.addFamily(hcd);
082      i++;
083    }
084
085    HRegionInfo info = new HRegionInfo(TABLENAME, null, null, false);
086    Path path = new Path(DIR, callingMethod);
087    return HBaseTestingUtility.createRegionAndWAL(info, path, conf, htd);
088  }
089
090  // A helper function to create puts.
091  private Put createPut(int familyNum, int putNum) {
092    byte[] qf  = Bytes.toBytes("q" + familyNum);
093    byte[] row = Bytes.toBytes("row" + familyNum + "-" + putNum);
094    byte[] val = Bytes.toBytes("val" + familyNum + "-" + putNum);
095    Put p = new Put(row);
096    p.addColumn(FAMILIES[familyNum - 1], qf, val);
097    return p;
098  }
099
100  // A helper function to create double puts, so something can be compacted later.
101  private Put createDoublePut(int familyNum, int putNum) {
102    byte[] qf  = Bytes.toBytes("q" + familyNum);
103    byte[] row = Bytes.toBytes("row" + familyNum + "-" + putNum);
104    byte[] val = Bytes.toBytes("val" + familyNum + "-" + putNum);
105    Put p = new Put(row);
106    // add twice with different timestamps
107    p.addColumn(FAMILIES[familyNum - 1], qf, 10, val);
108    p.addColumn(FAMILIES[familyNum - 1], qf, 20, val);
109    return p;
110  }
111
112  // A helper function to create gets.
113  private Get createGet(int familyNum, int putNum) {
114    byte[] row = Bytes.toBytes("row" + familyNum + "-" + putNum);
115    return new Get(row);
116  }
117
118  private void verifyInMemoryFlushSize(Region region) {
119    assertEquals(
120      ((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore).getInmemoryFlushSize(),
121      ((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore).getInmemoryFlushSize());
122  }
123
124  // A helper function to verify edits.
125  void verifyEdit(int familyNum, int putNum, Table table) throws IOException {
126    Result r = table.get(createGet(familyNum, putNum));
127    byte[] family = FAMILIES[familyNum - 1];
128    byte[] qf = Bytes.toBytes("q" + familyNum);
129    byte[] val = Bytes.toBytes("val" + familyNum + "-" + putNum);
130    assertNotNull(("Missing Put#" + putNum + " for CF# " + familyNum), r.getFamilyMap(family));
131    assertNotNull(("Missing Put#" + putNum + " for CF# " + familyNum),
132      r.getFamilyMap(family).get(qf));
133    assertTrue(("Incorrect value for Put#" + putNum + " for CF# " + familyNum),
134      Arrays.equals(r.getFamilyMap(family).get(qf), val));
135  }
136
137  @Before
138  public void setup() {
139    conf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
140    conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY,
141        FlushNonSloppyStoresFirstPolicy.class.getName());
142    conf.setDouble(CompactingMemStore.IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY, 0.5);
143  }
144
145  @Test
146  public void testSelectiveFlushWithEager() throws IOException {
147    // Set up the configuration
148    conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 300 * 1024);
149    conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, 75 * 1024);
150    // set memstore to do data compaction
151    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
152        String.valueOf(MemoryCompactionPolicy.EAGER));
153
154    // Intialize the region
155    HRegion region = initHRegion("testSelectiveFlushWithEager", conf);
156    verifyInMemoryFlushSize(region);
157    // Add 1200 entries for CF1, 100 for CF2 and 50 for CF3
158    for (int i = 1; i <= 1200; i++) {
159      region.put(createPut(1, i));        // compacted memstore, all the keys are unique
160
161      if (i <= 100) {
162        region.put(createPut(2, i));
163        if (i <= 50) {
164          // compacted memstore, subject for compaction due to duplications
165          region.put(createDoublePut(3, i));
166        }
167      }
168    }
169
170    // Now add more puts for CF2, so that we only flush CF2 (DefaultMemStore) to disk
171    for (int i = 100; i < 2000; i++) {
172      region.put(createPut(2, i));
173    }
174
175    long totalMemstoreSize = region.getMemStoreDataSize();
176
177    // Find the smallest LSNs for edits wrt to each CF.
178    long smallestSeqCF1PhaseI = region.getOldestSeqIdOfStore(FAMILY1);
179    long smallestSeqCF2PhaseI = region.getOldestSeqIdOfStore(FAMILY2);
180    long smallestSeqCF3PhaseI = region.getOldestSeqIdOfStore(FAMILY3);
181
182    // Find the sizes of the memstores of each CF.
183    MemStoreSize cf1MemstoreSizePhaseI = region.getStore(FAMILY1).getMemStoreSize();
184    MemStoreSize cf2MemstoreSizePhaseI = region.getStore(FAMILY2).getMemStoreSize();
185    MemStoreSize cf3MemstoreSizePhaseI = region.getStore(FAMILY3).getMemStoreSize();
186
187    // Get the overall smallest LSN in the region's memstores.
188    long smallestSeqInRegionCurrentMemstorePhaseI = getWAL(region)
189        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
190
191    String s = "\n\n----------------------------------\n"
192        + "Upon initial insert and before any flush, size of CF1 is:"
193        + cf1MemstoreSizePhaseI + ", is CF1 compacted memstore?:"
194        + region.getStore(FAMILY1).isSloppyMemStore() + ". Size of CF2 is:"
195        + cf2MemstoreSizePhaseI + ", is CF2 compacted memstore?:"
196        + region.getStore(FAMILY2).isSloppyMemStore() + ". Size of CF3 is:"
197        + cf3MemstoreSizePhaseI + ", is CF3 compacted memstore?:"
198        + region.getStore(FAMILY3).isSloppyMemStore() + "\n";
199
200    // The overall smallest LSN in the region's memstores should be the same as
201    // the LSN of the smallest edit in CF1
202    assertEquals(smallestSeqCF1PhaseI, smallestSeqInRegionCurrentMemstorePhaseI);
203
204    // Some other sanity checks.
205    assertTrue(smallestSeqCF1PhaseI < smallestSeqCF2PhaseI);
206    assertTrue(smallestSeqCF2PhaseI < smallestSeqCF3PhaseI);
207    assertTrue(cf1MemstoreSizePhaseI.getDataSize() > 0);
208    assertTrue(cf2MemstoreSizePhaseI.getDataSize() > 0);
209    assertTrue(cf3MemstoreSizePhaseI.getDataSize() > 0);
210
211    // The total memstore size should be the same as the sum of the sizes of
212    // memstores of CF1, CF2 and CF3.
213    String msg = "totalMemstoreSize="+totalMemstoreSize +
214        " cf1MemstoreSizePhaseI="+cf1MemstoreSizePhaseI +
215        " cf2MemstoreSizePhaseI="+cf2MemstoreSizePhaseI +
216        " cf3MemstoreSizePhaseI="+cf3MemstoreSizePhaseI ;
217    assertEquals(msg, totalMemstoreSize, cf1MemstoreSizePhaseI.getDataSize()
218        + cf2MemstoreSizePhaseI.getDataSize() + cf3MemstoreSizePhaseI.getDataSize());
219
220    // Flush!!!!!!!!!!!!!!!!!!!!!!
221    // We have big compacting memstore CF1 and two small memstores:
222    // CF2 (not compacted) and CF3 (compacting)
223    // All together they are above the flush size lower bound.
224    // Since CF1 and CF3 should be flushed to memory (not to disk),
225    // CF2 is going to be flushed to disk.
226    // CF1 - nothing to compact (but flattening), CF3 - should be twice compacted
227    CompactingMemStore cms1 = (CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore;
228    CompactingMemStore cms3 = (CompactingMemStore) ((HStore) region.getStore(FAMILY3)).memstore;
229    cms1.flushInMemory();
230    cms3.flushInMemory();
231    region.flush(false);
232
233    // Recalculate everything
234    MemStoreSize cf1MemstoreSizePhaseII = region.getStore(FAMILY1).getMemStoreSize();
235    MemStoreSize cf2MemstoreSizePhaseII = region.getStore(FAMILY2).getMemStoreSize();
236    MemStoreSize cf3MemstoreSizePhaseII = region.getStore(FAMILY3).getMemStoreSize();
237
238    long smallestSeqInRegionCurrentMemstorePhaseII = getWAL(region)
239        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
240    // Find the smallest LSNs for edits wrt to each CF.
241    long smallestSeqCF1PhaseII = region.getOldestSeqIdOfStore(FAMILY1);
242    long smallestSeqCF2PhaseII = region.getOldestSeqIdOfStore(FAMILY2);
243    long smallestSeqCF3PhaseII = region.getOldestSeqIdOfStore(FAMILY3);
244
245    s = s + "\n----After first flush! CF1 should be flushed to memory, but not compacted.---\n"
246        + "Size of CF1 is:" + cf1MemstoreSizePhaseII + ", size of CF2 is:" + cf2MemstoreSizePhaseII
247        + ", size of CF3 is:" + cf3MemstoreSizePhaseII + "\n";
248
249    // CF1 was flushed to memory, but there is nothing to compact, and CF1 was flattened
250    assertTrue(cf1MemstoreSizePhaseII.getDataSize() == cf1MemstoreSizePhaseI.getDataSize());
251    assertTrue(cf1MemstoreSizePhaseII.getHeapSize() < cf1MemstoreSizePhaseI.getHeapSize());
252
253    // CF2 should become empty
254    assertEquals(0, cf2MemstoreSizePhaseII.getDataSize());
255    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseII.getHeapSize());
256
257    // verify that CF3 was flushed to memory and was compacted (this is approximation check)
258    assertTrue(cf3MemstoreSizePhaseI.getDataSize() > cf3MemstoreSizePhaseII.getDataSize());
259    assertTrue(
260        cf3MemstoreSizePhaseI.getHeapSize() / 2 > cf3MemstoreSizePhaseII.getHeapSize());
261
262    // Now the smallest LSN in the region should be the same as the smallest
263    // LSN in the memstore of CF1.
264    assertEquals(smallestSeqInRegionCurrentMemstorePhaseII, smallestSeqCF1PhaseI);
265
266    // Now add more puts for CF1, so that we also flush CF1 to disk instead of
267    // memory in next flush
268    for (int i = 1200; i < 3000; i++) {
269      region.put(createPut(1, i));
270    }
271
272    s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseII
273        + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseII + ", " +
274        "the smallest sequence in CF2:"
275        + smallestSeqCF2PhaseII +", the smallest sequence in CF3:" + smallestSeqCF3PhaseII + "\n";
276
277    // How much does the CF1 memstore occupy? Will be used later.
278    MemStoreSize cf1MemstoreSizePhaseIII = region.getStore(FAMILY1).getMemStoreSize();
279    long smallestSeqCF1PhaseIII = region.getOldestSeqIdOfStore(FAMILY1);
280
281    s = s + "----After more puts into CF1 its size is:" + cf1MemstoreSizePhaseIII
282        + ", and its sequence is:" + smallestSeqCF1PhaseIII + " ----\n" ;
283
284
285    // Flush!!!!!!!!!!!!!!!!!!!!!!
286    // Flush again, CF1 is flushed to disk
287    // CF2 is flushed to disk, because it is not in-memory compacted memstore
288    // CF3 is flushed empty to memory (actually nothing happens to CF3)
289    region.flush(false);
290
291    // Recalculate everything
292    MemStoreSize cf1MemstoreSizePhaseIV = region.getStore(FAMILY1).getMemStoreSize();
293    MemStoreSize cf2MemstoreSizePhaseIV = region.getStore(FAMILY2).getMemStoreSize();
294    MemStoreSize cf3MemstoreSizePhaseIV = region.getStore(FAMILY3).getMemStoreSize();
295
296    long smallestSeqInRegionCurrentMemstorePhaseIV = getWAL(region)
297        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
298    long smallestSeqCF1PhaseIV = region.getOldestSeqIdOfStore(FAMILY1);
299    long smallestSeqCF2PhaseIV = region.getOldestSeqIdOfStore(FAMILY2);
300    long smallestSeqCF3PhaseIV = region.getOldestSeqIdOfStore(FAMILY3);
301
302    s = s + "----After SECOND FLUSH, CF1 size is:" + cf1MemstoreSizePhaseIV + ", CF2 size is:"
303        + cf2MemstoreSizePhaseIV + " and CF3 size is:" + cf3MemstoreSizePhaseIV
304        + "\n";
305
306    s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseIV
307        + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseIV + ", " +
308        "the smallest sequence in CF2:"
309        + smallestSeqCF2PhaseIV +", the smallest sequence in CF3:" + smallestSeqCF3PhaseIV
310        + "\n";
311
312    // CF1's pipeline component (inserted before first flush) should be flushed to disk
313    // CF2 should be flushed to disk
314    assertTrue(cf1MemstoreSizePhaseIII.getDataSize() > cf1MemstoreSizePhaseIV.getDataSize());
315    assertEquals(0, cf2MemstoreSizePhaseIV.getDataSize());
316    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseIV.getHeapSize());
317
318    // CF3 shouldn't have been touched.
319    assertEquals(cf3MemstoreSizePhaseIV, cf3MemstoreSizePhaseII);
320
321    // the smallest LSN of CF3 shouldn't change
322    assertEquals(smallestSeqCF3PhaseII, smallestSeqCF3PhaseIV);
323
324    // CF3 should be bottleneck for WAL
325    assertEquals(s, smallestSeqInRegionCurrentMemstorePhaseIV, smallestSeqCF3PhaseIV);
326
327    // Flush!!!!!!!!!!!!!!!!!!!!!!
328    // Trying to clean the existing memstores, CF2 all flushed to disk. The single
329    // memstore segment in the compaction pipeline of CF1 and CF3 should be flushed to disk.
330    region.flush(true);
331
332    // Recalculate everything
333    MemStoreSize cf1MemstoreSizePhaseV = region.getStore(FAMILY1).getMemStoreSize();
334    MemStoreSize cf2MemstoreSizePhaseV = region.getStore(FAMILY2).getMemStoreSize();
335    MemStoreSize cf3MemstoreSizePhaseV = region.getStore(FAMILY3).getMemStoreSize();
336    long smallestSeqInRegionCurrentMemstorePhaseV = getWAL(region)
337        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
338
339    assertEquals(0, cf1MemstoreSizePhaseV.getDataSize());
340    assertEquals(MutableSegment.DEEP_OVERHEAD, cf1MemstoreSizePhaseV.getHeapSize());
341    assertEquals(0, cf2MemstoreSizePhaseV.getDataSize());
342    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseV.getHeapSize());
343    assertEquals(0, cf3MemstoreSizePhaseV.getDataSize());
344    assertEquals(MutableSegment.DEEP_OVERHEAD, cf3MemstoreSizePhaseV.getHeapSize());
345
346    // What happens when we hit the memstore limit, but we are not able to find
347    // any Column Family above the threshold?
348    // In that case, we should flush all the CFs.
349
350    // The memstore limit is 100*1024 and the column family flush threshold is
351    // around 25*1024. We try to just hit the memstore limit with each CF's
352    // memstore being below the CF flush threshold.
353    for (int i = 1; i <= 300; i++) {
354      region.put(createPut(1, i));
355      region.put(createPut(2, i));
356      region.put(createPut(3, i));
357      region.put(createPut(4, i));
358      region.put(createPut(5, i));
359    }
360
361    region.flush(false);
362
363    s = s + "----AFTER THIRD AND FORTH FLUSH, The smallest sequence in region WAL is: "
364        + smallestSeqInRegionCurrentMemstorePhaseV
365        + ". After additional inserts and last flush, the entire region size is:" + region
366        .getMemStoreDataSize()
367        + "\n----------------------------------\n";
368
369    // Since we won't find any CF above the threshold, and hence no specific
370    // store to flush, we should flush all the memstores
371    // Also compacted memstores are flushed to disk.
372    assertEquals(0, region.getMemStoreDataSize());
373    System.out.println(s);
374    HBaseTestingUtility.closeRegionAndWAL(region);
375  }
376
377  /*------------------------------------------------------------------------------*/
378  /* Check the same as above but for index-compaction type of compacting memstore */
379  @Test
380  public void testSelectiveFlushWithIndexCompaction() throws IOException {
381    /*------------------------------------------------------------------------------*/
382    /* SETUP */
383    // Set up the configuration
384    conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 300 * 1024);
385    conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, 75 * 1024);
386    conf.setDouble(CompactingMemStore.IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY, 0.5);
387    // set memstore to index-compaction
388    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
389        String.valueOf(MemoryCompactionPolicy.BASIC));
390
391    // Initialize the region
392    HRegion region = initHRegion("testSelectiveFlushWithIndexCompaction", conf);
393    verifyInMemoryFlushSize(region);
394    /*------------------------------------------------------------------------------*/
395    /* PHASE I - insertions */
396    // Add 1200 entries for CF1, 100 for CF2 and 50 for CF3
397    for (int i = 1; i <= 1200; i++) {
398      region.put(createPut(1, i));        // compacted memstore
399      if (i <= 100) {
400        region.put(createPut(2, i));
401        if (i <= 50) {
402          region.put(createDoublePut(3, i)); // subject for in-memory compaction
403        }
404      }
405    }
406    // Now add more puts for CF2, so that we only flush CF2 to disk
407    for (int i = 100; i < 2000; i++) {
408      region.put(createPut(2, i));
409    }
410
411    /*------------------------------------------------------------------------------*/
412    /*------------------------------------------------------------------------------*/
413    /* PHASE I - collect sizes */
414    long totalMemstoreSizePhaseI = region.getMemStoreDataSize();
415    // Find the smallest LSNs for edits wrt to each CF.
416    long smallestSeqCF1PhaseI = region.getOldestSeqIdOfStore(FAMILY1);
417    long smallestSeqCF2PhaseI = region.getOldestSeqIdOfStore(FAMILY2);
418    long smallestSeqCF3PhaseI = region.getOldestSeqIdOfStore(FAMILY3);
419    // Find the sizes of the memstores of each CF.
420    MemStoreSize cf1MemstoreSizePhaseI = region.getStore(FAMILY1).getMemStoreSize();
421    MemStoreSize cf2MemstoreSizePhaseI = region.getStore(FAMILY2).getMemStoreSize();
422    MemStoreSize cf3MemstoreSizePhaseI = region.getStore(FAMILY3).getMemStoreSize();
423    // Get the overall smallest LSN in the region's memstores.
424    long smallestSeqInRegionCurrentMemstorePhaseI = getWAL(region)
425        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
426
427    /*------------------------------------------------------------------------------*/
428    /* PHASE I - validation */
429    // The overall smallest LSN in the region's memstores should be the same as
430    // the LSN of the smallest edit in CF1
431    assertEquals(smallestSeqCF1PhaseI, smallestSeqInRegionCurrentMemstorePhaseI);
432    // Some other sanity checks.
433    assertTrue(smallestSeqCF1PhaseI < smallestSeqCF2PhaseI);
434    assertTrue(smallestSeqCF2PhaseI < smallestSeqCF3PhaseI);
435    assertTrue(cf1MemstoreSizePhaseI.getDataSize() > 0);
436    assertTrue(cf2MemstoreSizePhaseI.getDataSize() > 0);
437    assertTrue(cf3MemstoreSizePhaseI.getDataSize() > 0);
438
439    // The total memstore size should be the same as the sum of the sizes of
440    // memstores of CF1, CF2 and CF3.
441    assertEquals(totalMemstoreSizePhaseI, cf1MemstoreSizePhaseI.getDataSize()
442        + cf2MemstoreSizePhaseI.getDataSize() + cf3MemstoreSizePhaseI.getDataSize());
443
444    /*------------------------------------------------------------------------------*/
445    /* PHASE I - Flush */
446    // First Flush in Test!!!!!!!!!!!!!!!!!!!!!!
447    // CF1, CF2, CF3, all together they are above the flush size lower bound.
448    // Since CF1 and CF3 are compacting, CF2 is going to be flushed to disk.
449    // CF1 and CF3 - flushed to memory and flatten explicitly
450    region.flush(false);
451    CompactingMemStore cms1 = (CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore;
452    CompactingMemStore cms3 = (CompactingMemStore) ((HStore) region.getStore(FAMILY3)).memstore;
453    cms1.flushInMemory();
454    cms3.flushInMemory();
455
456    // CF3/CF1 should be merged so wait here to be sure the compaction is done
457    while (((CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore)
458        .isMemStoreFlushingInMemory()) {
459      Threads.sleep(10);
460    }
461    while (((CompactingMemStore) ((HStore) region.getStore(FAMILY3)).memstore)
462        .isMemStoreFlushingInMemory()) {
463      Threads.sleep(10);
464    }
465
466    /*------------------------------------------------------------------------------*/
467    /*------------------------------------------------------------------------------*/
468    /* PHASE II - collect sizes */
469    // Recalculate everything
470    MemStoreSize cf1MemstoreSizePhaseII = region.getStore(FAMILY1).getMemStoreSize();
471    MemStoreSize cf2MemstoreSizePhaseII = region.getStore(FAMILY2).getMemStoreSize();
472    MemStoreSize cf3MemstoreSizePhaseII = region.getStore(FAMILY3).getMemStoreSize();
473    long smallestSeqInRegionCurrentMemstorePhaseII = getWAL(region)
474        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
475    // Find the smallest LSNs for edits wrt to each CF.
476    long smallestSeqCF3PhaseII = region.getOldestSeqIdOfStore(FAMILY3);
477    long totalMemstoreSizePhaseII = region.getMemStoreDataSize();
478
479    /*------------------------------------------------------------------------------*/
480    /* PHASE II - validation */
481    // CF1 was flushed to memory, should be flattened and take less space
482    assertEquals(cf1MemstoreSizePhaseII.getDataSize() , cf1MemstoreSizePhaseI.getDataSize());
483    assertTrue(cf1MemstoreSizePhaseII.getHeapSize() < cf1MemstoreSizePhaseI.getHeapSize());
484    // CF2 should become empty
485    assertEquals(0, cf2MemstoreSizePhaseII.getDataSize());
486    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseII.getHeapSize());
487    // verify that CF3 was flushed to memory and was not compacted (this is an approximation check)
488    // if compacted CF# should be at least twice less because its every key was duplicated
489    assertEquals(cf3MemstoreSizePhaseII.getDataSize() , cf3MemstoreSizePhaseI.getDataSize());
490    assertTrue(cf3MemstoreSizePhaseI.getHeapSize() / 2 < cf3MemstoreSizePhaseII.getHeapSize());
491
492    // Now the smallest LSN in the region should be the same as the smallest
493    // LSN in the memstore of CF1.
494    assertEquals(smallestSeqInRegionCurrentMemstorePhaseII, smallestSeqCF1PhaseI);
495    // The total memstore size should be the same as the sum of the sizes of
496    // memstores of CF1, CF2 and CF3. Counting the empty active segments in CF1/2/3 and pipeline
497    // items in CF1/2
498    assertEquals(totalMemstoreSizePhaseII, cf1MemstoreSizePhaseII.getDataSize()
499        + cf2MemstoreSizePhaseII.getDataSize() + cf3MemstoreSizePhaseII.getDataSize());
500
501    /*------------------------------------------------------------------------------*/
502    /*------------------------------------------------------------------------------*/
503    /* PHASE III - insertions */
504    // Now add more puts for CF1, so that we also flush CF1 to disk instead of
505    // memory in next flush. This is causing the CF! to be flushed to memory twice.
506    for (int i = 1200; i < 8000; i++) {
507      region.put(createPut(1, i));
508    }
509
510    // CF1 should be flatten and merged so wait here to be sure the compaction is done
511    while (((CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore)
512        .isMemStoreFlushingInMemory()) {
513      Threads.sleep(10);
514    }
515
516    /*------------------------------------------------------------------------------*/
517    /* PHASE III - collect sizes */
518    // How much does the CF1 memstore occupy now? Will be used later.
519    MemStoreSize cf1MemstoreSizePhaseIII = region.getStore(FAMILY1).getMemStoreSize();
520    long totalMemstoreSizePhaseIII = region.getMemStoreDataSize();
521
522    /*------------------------------------------------------------------------------*/
523    /* PHASE III - validation */
524    // The total memstore size should be the same as the sum of the sizes of
525    // memstores of CF1, CF2 and CF3. Counting the empty active segments in CF1/2/3 and pipeline
526    // items in CF1/2
527    assertEquals(totalMemstoreSizePhaseIII, cf1MemstoreSizePhaseIII.getDataSize()
528        + cf2MemstoreSizePhaseII.getDataSize() + cf3MemstoreSizePhaseII.getDataSize());
529
530    /*------------------------------------------------------------------------------*/
531    /* PHASE III - Flush */
532    // Second Flush in Test!!!!!!!!!!!!!!!!!!!!!!
533    // CF1 is flushed to disk, but not entirely emptied.
534    // CF2 was and remained empty, same way nothing happens to CF3
535    region.flush(false);
536
537    /*------------------------------------------------------------------------------*/
538    /*------------------------------------------------------------------------------*/
539    /* PHASE IV - collect sizes */
540    // Recalculate everything
541    MemStoreSize cf1MemstoreSizePhaseIV = region.getStore(FAMILY1).getMemStoreSize();
542    MemStoreSize cf2MemstoreSizePhaseIV = region.getStore(FAMILY2).getMemStoreSize();
543    MemStoreSize cf3MemstoreSizePhaseIV = region.getStore(FAMILY3).getMemStoreSize();
544    long smallestSeqInRegionCurrentMemstorePhaseIV = getWAL(region)
545        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
546    long smallestSeqCF3PhaseIV = region.getOldestSeqIdOfStore(FAMILY3);
547
548    /*------------------------------------------------------------------------------*/
549    /* PHASE IV - validation */
550    // CF1's biggest pipeline component (inserted before first flush) should be flushed to disk
551    // CF2 should remain empty
552    assertTrue(cf1MemstoreSizePhaseIII.getDataSize() > cf1MemstoreSizePhaseIV.getDataSize());
553    assertEquals(0, cf2MemstoreSizePhaseIV.getDataSize());
554    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseIV.getHeapSize());
555    // CF3 shouldn't have been touched.
556    assertEquals(cf3MemstoreSizePhaseIV, cf3MemstoreSizePhaseII);
557    // the smallest LSN of CF3 shouldn't change
558    assertEquals(smallestSeqCF3PhaseII, smallestSeqCF3PhaseIV);
559    // CF3 should be bottleneck for WAL
560    assertEquals(smallestSeqInRegionCurrentMemstorePhaseIV, smallestSeqCF3PhaseIV);
561
562    /*------------------------------------------------------------------------------*/
563    /* PHASE IV - Flush */
564    // Third Flush in Test!!!!!!!!!!!!!!!!!!!!!!
565    // Force flush to disk on all memstores (flush parameter true).
566    // CF1/CF3 all flushed to disk. Note that active sets of CF1 and CF3 are empty
567    region.flush(true);
568
569    /*------------------------------------------------------------------------------*/
570    /*------------------------------------------------------------------------------*/
571    /* PHASE V - collect sizes */
572    // Recalculate everything
573    MemStoreSize cf1MemstoreSizePhaseV = region.getStore(FAMILY1).getMemStoreSize();
574    MemStoreSize cf2MemstoreSizePhaseV = region.getStore(FAMILY2).getMemStoreSize();
575    MemStoreSize cf3MemstoreSizePhaseV = region.getStore(FAMILY3).getMemStoreSize();
576    long smallestSeqInRegionCurrentMemstorePhaseV = getWAL(region)
577        .getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
578    long totalMemstoreSizePhaseV = region.getMemStoreDataSize();
579
580    /*------------------------------------------------------------------------------*/
581    /* PHASE V - validation */
582    assertEquals(0, cf1MemstoreSizePhaseV.getDataSize());
583    assertEquals(MutableSegment.DEEP_OVERHEAD, cf1MemstoreSizePhaseV.getHeapSize());
584    assertEquals(0, cf2MemstoreSizePhaseV.getDataSize());
585    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseV.getHeapSize());
586    assertEquals(0, cf3MemstoreSizePhaseV.getDataSize());
587    assertEquals(MutableSegment.DEEP_OVERHEAD, cf3MemstoreSizePhaseV.getHeapSize());
588    // The total memstores size should be empty
589    assertEquals(0, totalMemstoreSizePhaseV);
590    // Because there is nothing in any memstore the WAL's LSN should be -1
591    assertEquals(HConstants.NO_SEQNUM, smallestSeqInRegionCurrentMemstorePhaseV);
592
593    // What happens when we hit the memstore limit, but we are not able to find
594    // any Column Family above the threshold?
595    // In that case, we should flush all the CFs.
596
597    /*------------------------------------------------------------------------------*/
598    /*------------------------------------------------------------------------------*/
599    /* PHASE VI - insertions */
600    // The memstore limit is 200*1024 and the column family flush threshold is
601    // around 50*1024. We try to just hit the memstore limit with each CF's
602    // memstore being below the CF flush threshold.
603    for (int i = 1; i <= 300; i++) {
604      region.put(createPut(1, i));
605      region.put(createPut(2, i));
606      region.put(createPut(3, i));
607      region.put(createPut(4, i));
608      region.put(createPut(5, i));
609    }
610
611    MemStoreSize cf1ActiveSizePhaseVI = region.getStore(FAMILY1).getMemStoreSize();
612    MemStoreSize cf3ActiveSizePhaseVI = region.getStore(FAMILY3).getMemStoreSize();
613    MemStoreSize cf5ActiveSizePhaseVI = region.getStore(FAMILIES[4]).getMemStoreSize();
614
615    /*------------------------------------------------------------------------------*/
616    /* PHASE VI - Flush */
617    // Fourth Flush in Test!!!!!!!!!!!!!!!!!!!!!!
618    // None among compacting memstores was flushed to memory due to previous puts.
619    // But is going to be moved to pipeline and flatten due to the flush.
620    region.flush(false);
621    // Since we won't find any CF above the threshold, and hence no specific
622    // store to flush, we should flush all the memstores
623    // Also compacted memstores are flushed to disk, but not entirely emptied
624    MemStoreSize cf1ActiveSizePhaseVII = region.getStore(FAMILY1).getMemStoreSize();
625    MemStoreSize cf3ActiveSizePhaseVII = region.getStore(FAMILY3).getMemStoreSize();
626    MemStoreSize cf5ActiveSizePhaseVII = region.getStore(FAMILIES[4]).getMemStoreSize();
627
628    assertTrue(cf1ActiveSizePhaseVII.getDataSize() < cf1ActiveSizePhaseVI.getDataSize());
629    assertTrue(cf3ActiveSizePhaseVII.getDataSize() < cf3ActiveSizePhaseVI.getDataSize());
630    assertTrue(cf5ActiveSizePhaseVII.getDataSize() < cf5ActiveSizePhaseVI.getDataSize());
631
632    HBaseTestingUtility.closeRegionAndWAL(region);
633  }
634
635  @Test
636  public void testSelectiveFlushAndWALinDataCompaction() throws IOException {
637    // Set up the configuration
638    conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 300 * 1024);
639    conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, 75 * 1024);
640    // set memstore to do data compaction and not to use the speculative scan
641    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
642        String.valueOf(MemoryCompactionPolicy.EAGER));
643
644    // Intialize the HRegion
645    HRegion region = initHRegion("testSelectiveFlushAndWALinDataCompaction", conf);
646    verifyInMemoryFlushSize(region);
647    // Add 1200 entries for CF1, 100 for CF2 and 50 for CF3
648    for (int i = 1; i <= 1200; i++) {
649      region.put(createPut(1, i));
650      if (i <= 100) {
651        region.put(createPut(2, i));
652        if (i <= 50) {
653          region.put(createPut(3, i));
654        }
655      }
656    }
657    // Now add more puts for CF2, so that we only flush CF2 to disk
658    for (int i = 100; i < 2000; i++) {
659      region.put(createPut(2, i));
660    }
661
662    // in this test check the non-composite snapshot - flashing only tail of the pipeline
663    ((CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore).setCompositeSnapshot(false);
664    ((CompactingMemStore) ((HStore) region.getStore(FAMILY3)).memstore).setCompositeSnapshot(false);
665
666    long totalMemstoreSize = region.getMemStoreDataSize();
667
668    // Find the sizes of the memstores of each CF.
669    MemStoreSize cf1MemstoreSizePhaseI = region.getStore(FAMILY1).getMemStoreSize();
670    MemStoreSize cf2MemstoreSizePhaseI = region.getStore(FAMILY2).getMemStoreSize();
671    MemStoreSize cf3MemstoreSizePhaseI = region.getStore(FAMILY3).getMemStoreSize();
672
673    // Some other sanity checks.
674    assertTrue(cf1MemstoreSizePhaseI.getDataSize() > 0);
675    assertTrue(cf2MemstoreSizePhaseI.getDataSize() > 0);
676    assertTrue(cf3MemstoreSizePhaseI.getDataSize() > 0);
677
678    // The total memstore size should be the same as the sum of the sizes of
679    // memstores of CF1, CF2 and CF3.
680    String msg = "totalMemstoreSize="+totalMemstoreSize +
681        " DefaultMemStore.DEEP_OVERHEAD="+DefaultMemStore.DEEP_OVERHEAD +
682        " cf1MemstoreSizePhaseI="+cf1MemstoreSizePhaseI +
683        " cf2MemstoreSizePhaseI="+cf2MemstoreSizePhaseI +
684        " cf3MemstoreSizePhaseI="+cf3MemstoreSizePhaseI ;
685    assertEquals(msg, totalMemstoreSize, cf1MemstoreSizePhaseI.getDataSize()
686        + cf2MemstoreSizePhaseI.getDataSize() + cf3MemstoreSizePhaseI.getDataSize());
687
688    // Flush!
689    CompactingMemStore cms1 = (CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore;
690    CompactingMemStore cms3 = (CompactingMemStore) ((HStore) region.getStore(FAMILY3)).memstore;
691    cms1.flushInMemory();
692    cms3.flushInMemory();
693    region.flush(false);
694
695    MemStoreSize cf2MemstoreSizePhaseII = region.getStore(FAMILY2).getMemStoreSize();
696
697    long smallestSeqInRegionCurrentMemstorePhaseII =
698        region.getWAL().getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
699    long smallestSeqCF1PhaseII = region.getOldestSeqIdOfStore(FAMILY1);
700    long smallestSeqCF2PhaseII = region.getOldestSeqIdOfStore(FAMILY2);
701    long smallestSeqCF3PhaseII = region.getOldestSeqIdOfStore(FAMILY3);
702
703    // CF2 should have been cleared
704    assertEquals(0, cf2MemstoreSizePhaseII.getDataSize());
705    assertEquals(MutableSegment.DEEP_OVERHEAD, cf2MemstoreSizePhaseII.getHeapSize());
706
707    String s = "\n\n----------------------------------\n"
708        + "Upon initial insert and flush, LSN of CF1 is:"
709        + smallestSeqCF1PhaseII + ". LSN of CF2 is:"
710        + smallestSeqCF2PhaseII + ". LSN of CF3 is:"
711        + smallestSeqCF3PhaseII + ", smallestSeqInRegionCurrentMemstore:"
712        + smallestSeqInRegionCurrentMemstorePhaseII + "\n";
713
714    // Add same entries to compact them later
715    for (int i = 1; i <= 1200; i++) {
716      region.put(createPut(1, i));
717      if (i <= 100) {
718        region.put(createPut(2, i));
719        if (i <= 50) {
720          region.put(createPut(3, i));
721        }
722      }
723    }
724    // Now add more puts for CF2, so that we only flush CF2 to disk
725    for (int i = 100; i < 2000; i++) {
726      region.put(createPut(2, i));
727    }
728
729    long smallestSeqInRegionCurrentMemstorePhaseIII =
730        region.getWAL().getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
731    long smallestSeqCF1PhaseIII = region.getOldestSeqIdOfStore(FAMILY1);
732    long smallestSeqCF2PhaseIII = region.getOldestSeqIdOfStore(FAMILY2);
733    long smallestSeqCF3PhaseIII = region.getOldestSeqIdOfStore(FAMILY3);
734
735    s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseIII
736        + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseIII + ", " +
737        "the smallest sequence in CF2:"
738        + smallestSeqCF2PhaseIII +", the smallest sequence in CF3:" + smallestSeqCF3PhaseIII + "\n";
739
740    // Flush!
741    cms1 = (CompactingMemStore) ((HStore) region.getStore(FAMILY1)).memstore;
742    cms3 = (CompactingMemStore) ((HStore) region.getStore(FAMILY3)).memstore;
743    cms1.flushInMemory();
744    cms3.flushInMemory();
745    region.flush(false);
746
747    long smallestSeqInRegionCurrentMemstorePhaseIV =
748        region.getWAL().getEarliestMemStoreSeqNum(region.getRegionInfo().getEncodedNameAsBytes());
749    long smallestSeqCF1PhaseIV = region.getOldestSeqIdOfStore(FAMILY1);
750    long smallestSeqCF2PhaseIV = region.getOldestSeqIdOfStore(FAMILY2);
751    long smallestSeqCF3PhaseIV = region.getOldestSeqIdOfStore(FAMILY3);
752
753    s = s + "The smallest sequence in region WAL is: " + smallestSeqInRegionCurrentMemstorePhaseIV
754        + ", the smallest sequence in CF1:" + smallestSeqCF1PhaseIV + ", " +
755        "the smallest sequence in CF2:"
756        + smallestSeqCF2PhaseIV +", the smallest sequence in CF3:" + smallestSeqCF3PhaseIV + "\n";
757
758    // now check that the LSN of the entire WAL, of CF1 and of CF3 has progressed due to compaction
759    assertTrue(s, smallestSeqInRegionCurrentMemstorePhaseIV >
760        smallestSeqInRegionCurrentMemstorePhaseIII);
761    assertTrue(smallestSeqCF1PhaseIV > smallestSeqCF1PhaseIII);
762    assertTrue(smallestSeqCF3PhaseIV > smallestSeqCF3PhaseIII);
763
764    HBaseTestingUtility.closeRegionAndWAL(region);
765  }
766
767  @Test
768  public void testSelectiveFlushWithBasicAndMerge() throws IOException {
769    // Set up the configuration
770    conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 300 * 1024);
771    conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, 75 * 1024);
772    conf.setDouble(CompactingMemStore.IN_MEMORY_FLUSH_THRESHOLD_FACTOR_KEY, 0.8);
773    // set memstore to do index compaction with merge
774    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
775        String.valueOf(MemoryCompactionPolicy.BASIC));
776    // length of pipeline that requires merge
777    conf.setInt(MemStoreCompactionStrategy.COMPACTING_MEMSTORE_THRESHOLD_KEY, 1);
778
779    // Intialize the HRegion
780    HRegion region = initHRegion("testSelectiveFlushWithBasicAndMerge", conf);
781    verifyInMemoryFlushSize(region);
782    // Add 1200 entries for CF1 (CompactingMemStore), 100 for CF2 (DefaultMemStore) and 50 for CF3
783    for (int i = 1; i <= 1200; i++) {
784      region.put(createPut(1, i));
785      if (i <= 100) {
786        region.put(createPut(2, i));
787        if (i <= 50) {
788          region.put(createPut(3, i));
789        }
790      }
791    }
792    // Now put more entries to CF2
793    for (int i = 100; i < 2000; i++) {
794      region.put(createPut(2, i));
795    }
796
797    long totalMemstoreSize = region.getMemStoreDataSize();
798
799    // test in-memory flashing into CAM here
800    ((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore).setIndexType(
801        CompactingMemStore.IndexType.ARRAY_MAP);
802    ((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore).setIndexType(
803        CompactingMemStore.IndexType.ARRAY_MAP);
804
805    // Find the sizes of the memstores of each CF.
806    MemStoreSize cf1MemstoreSizePhaseI = region.getStore(FAMILY1).getMemStoreSize();
807    MemStoreSize cf2MemstoreSizePhaseI = region.getStore(FAMILY2).getMemStoreSize();
808    MemStoreSize cf3MemstoreSizePhaseI = region.getStore(FAMILY3).getMemStoreSize();
809
810    // Some other sanity checks.
811    assertTrue(cf1MemstoreSizePhaseI.getDataSize() > 0);
812    assertTrue(cf2MemstoreSizePhaseI.getDataSize() > 0);
813    assertTrue(cf3MemstoreSizePhaseI.getDataSize() > 0);
814
815    // The total memstore size should be the same as the sum of the sizes of
816    // memstores of CF1, CF2 and CF3.
817    assertEquals(totalMemstoreSize,
818        cf1MemstoreSizePhaseI.getDataSize() + cf2MemstoreSizePhaseI.getDataSize()
819            + cf3MemstoreSizePhaseI.getDataSize());
820
821    // Initiate in-memory Flush!
822    ((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore).flushInMemory();
823    ((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore).flushInMemory();
824    // CF1 and CF3 should be flatten and merged so wait here to be sure the merge is done
825    while (((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore)
826        .isMemStoreFlushingInMemory()) {
827      Threads.sleep(10);
828    }
829    while (((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore)
830        .isMemStoreFlushingInMemory()) {
831      Threads.sleep(10);
832    }
833
834    // Flush-to-disk! CF2 only should be flushed
835    region.flush(false);
836
837    MemStoreSize cf1MemstoreSizePhaseII = region.getStore(FAMILY1).getMemStoreSize();
838    MemStoreSize cf2MemstoreSizePhaseII = region.getStore(FAMILY2).getMemStoreSize();
839    MemStoreSize cf3MemstoreSizePhaseII = region.getStore(FAMILY3).getMemStoreSize();
840
841    // CF1 should be flushed in memory and just flattened, so CF1 heap overhead should be smaller
842    assertTrue(cf1MemstoreSizePhaseI.getHeapSize() > cf1MemstoreSizePhaseII.getHeapSize());
843    // CF1 should be flushed in memory and just flattened, so CF1 data size should remain the same
844    assertEquals(cf1MemstoreSizePhaseI.getDataSize(), cf1MemstoreSizePhaseII.getDataSize());
845    // CF2 should have been cleared
846    assertEquals(0, cf2MemstoreSizePhaseII.getDataSize());
847
848    // Add the same amount of entries to see the merging
849    for (int i = 1; i <= 1200; i++) {
850      region.put(createPut(1, i));
851      if (i <= 100) {
852        region.put(createPut(2, i));
853        if (i <= 50) {
854          region.put(createPut(3, i));
855        }
856      }
857    }
858    // Now add more puts for CF2, so that we only flush CF2 to disk
859    for (int i = 100; i < 2000; i++) {
860      region.put(createPut(2, i));
861    }
862
863    MemStoreSize cf1MemstoreSizePhaseIII = region.getStore(FAMILY1).getMemStoreSize();
864
865    // Flush in memory!
866    ((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore).flushInMemory();
867    ((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore).flushInMemory();
868    // CF1 and CF3 should be merged so wait here to be sure the merge is done
869    while (((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore)
870        .isMemStoreFlushingInMemory()) {
871      Threads.sleep(10);
872    }
873    while (((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore)
874        .isMemStoreFlushingInMemory()) {
875      Threads.sleep(10);
876    }
877    region.flush(false);
878
879    MemStoreSize cf1MemstoreSizePhaseIV = region.getStore(FAMILY1).getMemStoreSize();
880    MemStoreSize cf2MemstoreSizePhaseIV = region.getStore(FAMILY2).getMemStoreSize();
881
882    assertEquals(2*cf1MemstoreSizePhaseI.getDataSize(), cf1MemstoreSizePhaseIV.getDataSize());
883    // the decrease in the heap size due to usage of CellArrayMap instead of CSLM
884    // should be the same in flattening and in merge (first and second in-memory-flush)
885    // but in phase 1 we do not yet have immutable segment
886    assertEquals(
887        cf1MemstoreSizePhaseI.getHeapSize() - cf1MemstoreSizePhaseII.getHeapSize(),
888        cf1MemstoreSizePhaseIII.getHeapSize() - cf1MemstoreSizePhaseIV.getHeapSize()
889            - CellArrayImmutableSegment.DEEP_OVERHEAD_CAM);
890    assertEquals(3, // active, one in pipeline, snapshot
891        ((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore).getSegments().size());
892    // CF2 should have been cleared
893    assertEquals("\n<<< DEBUG: The data--heap sizes of stores before/after first flushes,"
894            + " CF1: " + cf1MemstoreSizePhaseI.getDataSize() + "/" + cf1MemstoreSizePhaseII
895            .getDataSize() + "--" + cf1MemstoreSizePhaseI.getHeapSize() + "/" + cf1MemstoreSizePhaseII
896            .getHeapSize() + ", CF2: " + cf2MemstoreSizePhaseI.getDataSize() + "/"
897            + cf2MemstoreSizePhaseII.getDataSize() + "--" + cf2MemstoreSizePhaseI.getHeapSize() + "/"
898            + cf2MemstoreSizePhaseII.getHeapSize() + ", CF3: " + cf3MemstoreSizePhaseI.getDataSize()
899            + "/" + cf3MemstoreSizePhaseII.getDataSize() + "--" + cf3MemstoreSizePhaseI.getHeapSize()
900            + "/" + cf3MemstoreSizePhaseII.getHeapSize() + "\n<<< AND before/after second flushes "
901            + " CF1: " + cf1MemstoreSizePhaseIII.getDataSize() + "/" + cf1MemstoreSizePhaseIV
902            .getDataSize() + "--" + cf1MemstoreSizePhaseIII.getHeapSize() + "/" + cf1MemstoreSizePhaseIV
903            .getHeapSize() + "\n",
904        0, cf2MemstoreSizePhaseIV.getDataSize());
905
906    HBaseTestingUtility.closeRegionAndWAL(region);
907  }
908
909  // should end in 300 seconds (5 minutes)
910  @Test
911  public void testStressFlushAndWALinIndexCompaction() throws IOException {
912    // Set up the configuration
913    conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 600 * 1024);
914    conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
915        200 * 1024);
916    // set memstore to do data compaction and not to use the speculative scan
917    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
918        String.valueOf(MemoryCompactionPolicy.BASIC));
919
920    // Successfully initialize the HRegion
921    HRegion region = initHRegion("testSelectiveFlushAndWALinDataCompaction", conf);
922    verifyInMemoryFlushSize(region);
923    Thread[] threads = new Thread[25];
924    for (int i = 0; i < threads.length; i++) {
925      int id = i * 10000;
926      ConcurrentPutRunnable runnable = new ConcurrentPutRunnable(region, id);
927      threads[i] = new Thread(runnable);
928      threads[i].start();
929    }
930    Threads.sleep(10000); // let other threads start
931    region.flush(true); // enforce flush of everything TO DISK while there are still ongoing puts
932    Threads.sleep(10000); // let other threads continue
933    region.flush(true); // enforce flush of everything TO DISK while there are still ongoing puts
934
935    ((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore).flushInMemory();
936    ((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore).flushInMemory();
937    while (((CompactingMemStore) ((HStore)region.getStore(FAMILY1)).memstore)
938        .isMemStoreFlushingInMemory()) {
939      Threads.sleep(10);
940    }
941    while (((CompactingMemStore) ((HStore)region.getStore(FAMILY3)).memstore)
942        .isMemStoreFlushingInMemory()) {
943      Threads.sleep(10);
944    }
945
946    for (int i = 0; i < threads.length; i++) {
947      try {
948        threads[i].join();
949      } catch (InterruptedException e) {
950        e.printStackTrace();
951      }
952    }
953  }
954
955  /**
956   * The in-memory-flusher thread performs the flush asynchronously. There is at most one thread per
957   * memstore instance. It takes the updatesLock exclusively, pushes active into the pipeline,
958   * releases updatesLock and compacts the pipeline.
959   */
960  private class ConcurrentPutRunnable implements Runnable {
961    private final HRegion stressedRegion;
962    private final int startNumber;
963
964    ConcurrentPutRunnable(HRegion r, int i) {
965      this.stressedRegion = r;
966      this.startNumber = i;
967    }
968
969    @Override
970    public void run() {
971
972      try {
973        int dummy = startNumber / 10000;
974        System.out.print("Thread " + dummy + " with start number " + startNumber + " starts\n");
975        // Add 1200 entries for CF1, 100 for CF2 and 50 for CF3
976        for (int i = startNumber; i <= startNumber + 3000; i++) {
977          stressedRegion.put(createPut(1, i));
978          if (i <= startNumber + 2000) {
979            stressedRegion.put(createPut(2, i));
980            if (i <= startNumber + 1000) {
981              stressedRegion.put(createPut(3, i));
982            }
983          }
984        }
985        System.out.print("Thread with start number " + startNumber + " continues to more puts\n");
986        // Now add more puts for CF2, so that we only flush CF2 to disk
987        for (int i = startNumber + 3000; i < startNumber + 5000; i++) {
988          stressedRegion.put(createPut(2, i));
989        }
990        // And add more puts for CF1
991        for (int i = startNumber + 5000; i < startNumber + 7000; i++) {
992          stressedRegion.put(createPut(1, i));
993        }
994        System.out.print("Thread with start number " + startNumber + " flushes\n");
995        // flush (IN MEMORY) one of the stores (each thread flushes different store)
996        // and wait till the flush and the following action are done
997        if (startNumber == 0) {
998          ((CompactingMemStore) ((HStore) stressedRegion.getStore(FAMILY1)).memstore)
999              .flushInMemory();
1000          while (((CompactingMemStore) ((HStore) stressedRegion.getStore(FAMILY1)).memstore)
1001              .isMemStoreFlushingInMemory()) {
1002            Threads.sleep(10);
1003          }
1004        }
1005        if (startNumber == 10000) {
1006          ((CompactingMemStore) ((HStore) stressedRegion.getStore(FAMILY2)).memstore).flushInMemory();
1007          while (((CompactingMemStore) ((HStore) stressedRegion.getStore(FAMILY2)).memstore)
1008              .isMemStoreFlushingInMemory()) {
1009            Threads.sleep(10);
1010          }
1011        }
1012        if (startNumber == 20000) {
1013          ((CompactingMemStore) ((HStore) stressedRegion.getStore(FAMILY3)).memstore).flushInMemory();
1014          while (((CompactingMemStore) ((HStore) stressedRegion.getStore(FAMILY3)).memstore)
1015              .isMemStoreFlushingInMemory()) {
1016            Threads.sleep(10);
1017          }
1018        }
1019        System.out.print("Thread with start number " + startNumber + " finishes\n");
1020      } catch (IOException e) {
1021        assert false;
1022      }
1023    }
1024  }
1025
1026  private WAL getWAL(Region region) {
1027    return ((HRegion)region).getWAL();
1028  }
1029}