001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_TYPE_KEY;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotNull;
024import static org.junit.Assert.assertNotSame;
025import static org.junit.Assert.assertTrue;
026import static org.junit.Assert.fail;
027
028import java.io.IOException;
029import java.lang.reflect.Field;
030import java.util.ArrayList;
031import java.util.Arrays;
032import java.util.HashMap;
033import java.util.List;
034import java.util.Map;
035import java.util.Map.Entry;
036import java.util.Random;
037import java.util.Set;
038import java.util.concurrent.Callable;
039import java.util.stream.Collectors;
040import java.util.stream.Stream;
041import org.apache.hadoop.conf.Configuration;
042import org.apache.hadoop.fs.FileStatus;
043import org.apache.hadoop.fs.FileSystem;
044import org.apache.hadoop.fs.LocatedFileStatus;
045import org.apache.hadoop.fs.Path;
046import org.apache.hadoop.fs.RemoteIterator;
047import org.apache.hadoop.hbase.ArrayBackedTag;
048import org.apache.hadoop.hbase.Cell;
049import org.apache.hadoop.hbase.CellUtil;
050import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
051import org.apache.hadoop.hbase.HBaseClassTestRule;
052import org.apache.hadoop.hbase.HBaseConfiguration;
053import org.apache.hadoop.hbase.HBaseTestingUtility;
054import org.apache.hadoop.hbase.HColumnDescriptor;
055import org.apache.hadoop.hbase.HConstants;
056import org.apache.hadoop.hbase.HDFSBlocksDistribution;
057import org.apache.hadoop.hbase.HTableDescriptor;
058import org.apache.hadoop.hbase.HadoopShims;
059import org.apache.hadoop.hbase.KeyValue;
060import org.apache.hadoop.hbase.PerformanceEvaluation;
061import org.apache.hadoop.hbase.PrivateCellUtil;
062import org.apache.hadoop.hbase.TableName;
063import org.apache.hadoop.hbase.Tag;
064import org.apache.hadoop.hbase.TagType;
065import org.apache.hadoop.hbase.client.Admin;
066import org.apache.hadoop.hbase.client.Connection;
067import org.apache.hadoop.hbase.client.ConnectionFactory;
068import org.apache.hadoop.hbase.client.Put;
069import org.apache.hadoop.hbase.client.RegionLocator;
070import org.apache.hadoop.hbase.client.Result;
071import org.apache.hadoop.hbase.client.ResultScanner;
072import org.apache.hadoop.hbase.client.Scan;
073import org.apache.hadoop.hbase.client.Table;
074import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
075import org.apache.hadoop.hbase.io.compress.Compression;
076import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
077import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
078import org.apache.hadoop.hbase.io.hfile.CacheConfig;
079import org.apache.hadoop.hbase.io.hfile.HFile;
080import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
081import org.apache.hadoop.hbase.io.hfile.HFileScanner;
082import org.apache.hadoop.hbase.regionserver.BloomType;
083import org.apache.hadoop.hbase.regionserver.HRegion;
084import org.apache.hadoop.hbase.regionserver.HStore;
085import org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem;
086import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
087import org.apache.hadoop.hbase.testclassification.LargeTests;
088import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests;
089import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
090import org.apache.hadoop.hbase.util.Bytes;
091import org.apache.hadoop.hbase.util.FSUtils;
092import org.apache.hadoop.hbase.util.ReflectionUtils;
093import org.apache.hadoop.hdfs.DistributedFileSystem;
094import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy;
095import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
096import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite;
097import org.apache.hadoop.io.NullWritable;
098import org.apache.hadoop.mapreduce.Job;
099import org.apache.hadoop.mapreduce.Mapper;
100import org.apache.hadoop.mapreduce.RecordWriter;
101import org.apache.hadoop.mapreduce.TaskAttemptContext;
102import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
103import org.junit.ClassRule;
104import org.junit.Ignore;
105import org.junit.Test;
106import org.junit.experimental.categories.Category;
107import org.mockito.Mockito;
108import org.slf4j.Logger;
109import org.slf4j.LoggerFactory;
110
111/**
112 * Simple test for {@link HFileOutputFormat2}.
113 * Sets up and runs a mapreduce job that writes hfile output.
114 * Creates a few inner classes to implement splits and an inputformat that
115 * emits keys and values like those of {@link PerformanceEvaluation}.
116 */
117@Category({VerySlowMapReduceTests.class, LargeTests.class})
118//TODO : Remove this in 3.0
119public class TestHFileOutputFormat2  {
120
121  @ClassRule
122  public static final HBaseClassTestRule CLASS_RULE =
123      HBaseClassTestRule.forClass(TestHFileOutputFormat2.class);
124
125  private final static int ROWSPERSPLIT = 1024;
126
127  public static final byte[] FAMILY_NAME = TestHRegionFileSystem.FAMILY_NAME;
128  private static final byte[][] FAMILIES = {
129    Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")), Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))};
130  private static final TableName[] TABLE_NAMES = Stream.of("TestTable", "TestTable2",
131          "TestTable3").map(TableName::valueOf).toArray(TableName[]::new);
132
133  private HBaseTestingUtility util = new HBaseTestingUtility();
134
135  private static final Logger LOG = LoggerFactory.getLogger(TestHFileOutputFormat2.class);
136
137  /**
138   * Simple mapper that makes KeyValue output.
139   */
140  static class RandomKVGeneratingMapper
141      extends Mapper<NullWritable, NullWritable,
142                 ImmutableBytesWritable, Cell> {
143
144    private int keyLength;
145    private static final int KEYLEN_DEFAULT=10;
146    private static final String KEYLEN_CONF="randomkv.key.length";
147
148    private int valLength;
149    private static final int VALLEN_DEFAULT=10;
150    private static final String VALLEN_CONF="randomkv.val.length";
151    private static final byte [] QUALIFIER = Bytes.toBytes("data");
152    private boolean multiTableMapper = false;
153    private TableName[] tables = null;
154
155
156    @Override
157    protected void setup(Context context) throws IOException,
158        InterruptedException {
159      super.setup(context);
160
161      Configuration conf = context.getConfiguration();
162      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
163      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
164      multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
165              false);
166      if (multiTableMapper) {
167        tables = TABLE_NAMES;
168      } else {
169        tables = new TableName[]{TABLE_NAMES[0]};
170      }
171    }
172
173    @Override
174    protected void map(
175        NullWritable n1, NullWritable n2,
176        Mapper<NullWritable, NullWritable,
177               ImmutableBytesWritable,Cell>.Context context)
178        throws java.io.IOException ,InterruptedException
179    {
180
181      byte keyBytes[] = new byte[keyLength];
182      byte valBytes[] = new byte[valLength];
183
184      int taskId = context.getTaskAttemptID().getTaskID().getId();
185      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
186      Random random = new Random();
187      byte[] key;
188      for (int j = 0; j < tables.length; ++j) {
189        for (int i = 0; i < ROWSPERSPLIT; i++) {
190          random.nextBytes(keyBytes);
191          // Ensure that unique tasks generate unique keys
192          keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
193          random.nextBytes(valBytes);
194          key = keyBytes;
195          if (multiTableMapper) {
196            key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
197          }
198
199          for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
200            Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes);
201            context.write(new ImmutableBytesWritable(key), kv);
202          }
203        }
204      }
205    }
206  }
207
208  /**
209   * Simple mapper that makes Put output.
210   */
211  static class RandomPutGeneratingMapper
212      extends Mapper<NullWritable, NullWritable,
213                 ImmutableBytesWritable, Put> {
214
215    private int keyLength;
216    private static final int KEYLEN_DEFAULT = 10;
217    private static final String KEYLEN_CONF = "randomkv.key.length";
218
219    private int valLength;
220    private static final int VALLEN_DEFAULT = 10;
221    private static final String VALLEN_CONF = "randomkv.val.length";
222    private static final byte[] QUALIFIER = Bytes.toBytes("data");
223    private boolean multiTableMapper = false;
224    private TableName[] tables = null;
225
226    @Override
227    protected void setup(Context context) throws IOException,
228            InterruptedException {
229      super.setup(context);
230
231      Configuration conf = context.getConfiguration();
232      keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT);
233      valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT);
234      multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY,
235              false);
236      if (multiTableMapper) {
237        tables = TABLE_NAMES;
238      } else {
239        tables = new TableName[]{TABLE_NAMES[0]};
240      }
241    }
242
243    @Override
244    protected void map(
245            NullWritable n1, NullWritable n2,
246            Mapper<NullWritable, NullWritable,
247                    ImmutableBytesWritable, Put>.Context context)
248            throws java.io.IOException, InterruptedException {
249
250      byte keyBytes[] = new byte[keyLength];
251      byte valBytes[] = new byte[valLength];
252
253      int taskId = context.getTaskAttemptID().getTaskID().getId();
254      assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
255
256      Random random = new Random();
257      byte[] key;
258      for (int j = 0; j < tables.length; ++j) {
259        for (int i = 0; i < ROWSPERSPLIT; i++) {
260          random.nextBytes(keyBytes);
261          // Ensure that unique tasks generate unique keys
262          keyBytes[keyLength - 1] = (byte) (taskId & 0xFF);
263          random.nextBytes(valBytes);
264          key = keyBytes;
265          if (multiTableMapper) {
266            key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes);
267          }
268
269          for (byte[] family : TestHFileOutputFormat2.FAMILIES) {
270            Put p = new Put(keyBytes);
271            p.addColumn(family, QUALIFIER, valBytes);
272            // set TTL to very low so that the scan does not return any value
273            p.setTTL(1l);
274            context.write(new ImmutableBytesWritable(key), p);
275          }
276        }
277      }
278    }
279  }
280
281  private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
282    if (putSortReducer) {
283      job.setInputFormatClass(NMapInputFormat.class);
284      job.setMapperClass(RandomPutGeneratingMapper.class);
285      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
286      job.setMapOutputValueClass(Put.class);
287    } else {
288      job.setInputFormatClass(NMapInputFormat.class);
289      job.setMapperClass(RandomKVGeneratingMapper.class);
290      job.setMapOutputKeyClass(ImmutableBytesWritable.class);
291      job.setMapOutputValueClass(KeyValue.class);
292    }
293  }
294
295  /**
296   * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if
297   * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}.
298   * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a>
299   */
300  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
301  public void test_LATEST_TIMESTAMP_isReplaced()
302  throws Exception {
303    Configuration conf = new Configuration(this.util.getConfiguration());
304    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
305    TaskAttemptContext context = null;
306    Path dir =
307      util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced");
308    try {
309      Job job = new Job(conf);
310      FileOutputFormat.setOutputPath(job, dir);
311      context = createTestTaskAttemptContext(job);
312      HFileOutputFormat2 hof = new HFileOutputFormat2();
313      writer = hof.getRecordWriter(context);
314      final byte [] b = Bytes.toBytes("b");
315
316      // Test 1.  Pass a KV that has a ts of LATEST_TIMESTAMP.  It should be
317      // changed by call to write.  Check all in kv is same but ts.
318      KeyValue kv = new KeyValue(b, b, b);
319      KeyValue original = kv.clone();
320      writer.write(new ImmutableBytesWritable(), kv);
321      assertFalse(original.equals(kv));
322      assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv)));
323      assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv)));
324      assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv)));
325      assertNotSame(original.getTimestamp(), kv.getTimestamp());
326      assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp());
327
328      // Test 2. Now test passing a kv that has explicit ts.  It should not be
329      // changed by call to record write.
330      kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b);
331      original = kv.clone();
332      writer.write(new ImmutableBytesWritable(), kv);
333      assertTrue(original.equals(kv));
334    } finally {
335      if (writer != null && context != null) writer.close(context);
336      dir.getFileSystem(conf).delete(dir, true);
337    }
338  }
339
340  private TaskAttemptContext createTestTaskAttemptContext(final Job job)
341  throws Exception {
342    HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class);
343    TaskAttemptContext context = hadoop.createTestTaskAttemptContext(
344      job, "attempt_201402131733_0001_m_000000_0");
345    return context;
346  }
347
348  /*
349   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
350   * metadata used by time-restricted scans.
351   */
352  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
353  public void test_TIMERANGE() throws Exception {
354    Configuration conf = new Configuration(this.util.getConfiguration());
355    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
356    TaskAttemptContext context = null;
357    Path dir =
358      util.getDataTestDir("test_TIMERANGE_present");
359    LOG.info("Timerange dir writing to dir: "+ dir);
360    try {
361      // build a record writer using HFileOutputFormat2
362      Job job = new Job(conf);
363      FileOutputFormat.setOutputPath(job, dir);
364      context = createTestTaskAttemptContext(job);
365      HFileOutputFormat2 hof = new HFileOutputFormat2();
366      writer = hof.getRecordWriter(context);
367
368      // Pass two key values with explicit times stamps
369      final byte [] b = Bytes.toBytes("b");
370
371      // value 1 with timestamp 2000
372      KeyValue kv = new KeyValue(b, b, b, 2000, b);
373      KeyValue original = kv.clone();
374      writer.write(new ImmutableBytesWritable(), kv);
375      assertEquals(original,kv);
376
377      // value 2 with timestamp 1000
378      kv = new KeyValue(b, b, b, 1000, b);
379      original = kv.clone();
380      writer.write(new ImmutableBytesWritable(), kv);
381      assertEquals(original, kv);
382
383      // verify that the file has the proper FileInfo.
384      writer.close(context);
385
386      // the generated file lives 1 directory down from the attempt directory
387      // and is the only file, e.g.
388      // _attempt__0000_r_000000_0/b/1979617994050536795
389      FileSystem fs = FileSystem.get(conf);
390      Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
391      FileStatus[] sub1 = fs.listStatus(attemptDirectory);
392      FileStatus[] file = fs.listStatus(sub1[0].getPath());
393
394      // open as HFile Reader and pull out TIMERANGE FileInfo.
395      HFile.Reader rd =
396          HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf);
397      Map<byte[],byte[]> finfo = rd.loadFileInfo();
398      byte[] range = finfo.get("TIMERANGE".getBytes("UTF-8"));
399      assertNotNull(range);
400
401      // unmarshall and check values.
402      TimeRangeTracker timeRangeTracker =TimeRangeTracker.parseFrom(range);
403      LOG.info(timeRangeTracker.getMin() +
404          "...." + timeRangeTracker.getMax());
405      assertEquals(1000, timeRangeTracker.getMin());
406      assertEquals(2000, timeRangeTracker.getMax());
407      rd.close();
408    } finally {
409      if (writer != null && context != null) writer.close(context);
410      dir.getFileSystem(conf).delete(dir, true);
411    }
412  }
413
414  /**
415   * Run small MR job.
416   */
417  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
418  public void testWritingPEData() throws Exception {
419    Configuration conf = util.getConfiguration();
420    Path testDir = util.getDataTestDirOnTestFS("testWritingPEData");
421    FileSystem fs = testDir.getFileSystem(conf);
422
423    // Set down this value or we OOME in eclipse.
424    conf.setInt("mapreduce.task.io.sort.mb", 20);
425    // Write a few files.
426    conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024);
427
428    Job job = new Job(conf, "testWritingPEData");
429    setupRandomGeneratorMapper(job, false);
430    // This partitioner doesn't work well for number keys but using it anyways
431    // just to demonstrate how to configure it.
432    byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
433    byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT];
434
435    Arrays.fill(startKey, (byte)0);
436    Arrays.fill(endKey, (byte)0xff);
437
438    job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
439    // Set start and end rows for partitioner.
440    SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
441    SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
442    job.setReducerClass(KeyValueSortReducer.class);
443    job.setOutputFormatClass(HFileOutputFormat2.class);
444    job.setNumReduceTasks(4);
445    job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
446        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
447        KeyValueSerialization.class.getName());
448
449    FileOutputFormat.setOutputPath(job, testDir);
450    assertTrue(job.waitForCompletion(false));
451    FileStatus [] files = fs.listStatus(testDir);
452    assertTrue(files.length > 0);
453  }
454
455  /**
456   * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
457   * hfile.
458   */
459  @Test
460  public void test_WritingTagData()
461      throws Exception {
462    Configuration conf = new Configuration(this.util.getConfiguration());
463    final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
464    conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
465    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
466    TaskAttemptContext context = null;
467    Path dir =
468        util.getDataTestDir("WritingTagData");
469    try {
470      conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString());
471      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
472      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
473      Job job = new Job(conf);
474      FileOutputFormat.setOutputPath(job, dir);
475      context = createTestTaskAttemptContext(job);
476      HFileOutputFormat2 hof = new HFileOutputFormat2();
477      writer = hof.getRecordWriter(context);
478      final byte [] b = Bytes.toBytes("b");
479
480      List< Tag > tags = new ArrayList<>();
481      tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
482      KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
483      writer.write(new ImmutableBytesWritable(), kv);
484      writer.close(context);
485      writer = null;
486      FileSystem fs = dir.getFileSystem(conf);
487      RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
488      while(iterator.hasNext()) {
489        LocatedFileStatus keyFileStatus = iterator.next();
490        HFile.Reader reader =
491            HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf);
492        HFileScanner scanner = reader.getScanner(false, false, false);
493        scanner.seekTo();
494        Cell cell = scanner.getCell();
495        List<Tag> tagsFromCell = PrivateCellUtil.getTags(cell);
496        assertTrue(tagsFromCell.size() > 0);
497        for (Tag tag : tagsFromCell) {
498          assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
499        }
500      }
501    } finally {
502      if (writer != null && context != null) writer.close(context);
503      dir.getFileSystem(conf).delete(dir, true);
504    }
505  }
506
507  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
508  public void testJobConfiguration() throws Exception {
509    Configuration conf = new Configuration(this.util.getConfiguration());
510    conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration")
511        .toString());
512    Job job = new Job(conf);
513    job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration"));
514    Table table = Mockito.mock(Table.class);
515    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
516    setupMockStartKeys(regionLocator);
517    setupMockTableName(regionLocator);
518    HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
519    assertEquals(job.getNumReduceTasks(), 4);
520  }
521
522  private byte [][] generateRandomStartKeys(int numKeys) {
523    Random random = new Random();
524    byte[][] ret = new byte[numKeys][];
525    // first region start key is always empty
526    ret[0] = HConstants.EMPTY_BYTE_ARRAY;
527    for (int i = 1; i < numKeys; i++) {
528      ret[i] =
529        PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
530    }
531    return ret;
532  }
533
534  private byte[][] generateRandomSplitKeys(int numKeys) {
535    Random random = new Random();
536    byte[][] ret = new byte[numKeys][];
537    for (int i = 0; i < numKeys; i++) {
538      ret[i] =
539          PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
540    }
541    return ret;
542  }
543
544  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
545  public void testMRIncrementalLoad() throws Exception {
546    LOG.info("\nStarting test testMRIncrementalLoad\n");
547    doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad");
548  }
549
550  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
551  public void testMRIncrementalLoadWithSplit() throws Exception {
552    LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n");
553    doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit");
554  }
555
556  /**
557   * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true
558   * This test could only check the correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY
559   * is set to true. Because MiniHBaseCluster always run with single hostname (and different ports),
560   * it's not possible to check the region locality by comparing region locations and DN hostnames.
561   * When MiniHBaseCluster supports explicit hostnames parameter (just like MiniDFSCluster does),
562   * we could test region locality features more easily.
563   */
564  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
565  public void testMRIncrementalLoadWithLocality() throws Exception {
566    LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n");
567    doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1");
568    doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2");
569  }
570
571  //@Ignore("Wahtevs")
572  @Test
573  public void testMRIncrementalLoadWithPutSortReducer() throws Exception {
574    LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n");
575    doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer");
576  }
577
578  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
579                                     boolean putSortReducer, String tableStr) throws Exception {
580      doIncrementalLoadTest(shouldChangeRegions, shouldKeepLocality, putSortReducer,
581              Arrays.asList(tableStr));
582  }
583
584  @Test
585  public void testMultiMRIncrementalLoadWithPutSortReducer() throws Exception {
586    LOG.info("\nStarting test testMultiMRIncrementalLoadWithPutSortReducer\n");
587    doIncrementalLoadTest(false, false, true,
588            Arrays.stream(TABLE_NAMES).map(TableName::getNameAsString).collect(Collectors.toList
589                    ()));
590  }
591
592  private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality,
593      boolean putSortReducer, List<String> tableStr) throws Exception {
594    util = new HBaseTestingUtility();
595    Configuration conf = util.getConfiguration();
596    conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality);
597    int hostCount = 1;
598    int regionNum = 5;
599    if (shouldKeepLocality) {
600      // We should change host count higher than hdfs replica count when MiniHBaseCluster supports
601      // explicit hostnames parameter just like MiniDFSCluster does.
602      hostCount = 3;
603      regionNum = 20;
604    }
605
606    String[] hostnames = new String[hostCount];
607    for (int i = 0; i < hostCount; ++i) {
608      hostnames[i] = "datanode_" + i;
609    }
610    util.startMiniCluster(1, hostCount, hostnames);
611
612    Map<String, Table> allTables = new HashMap<>(tableStr.size());
613    List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size());
614    boolean writeMultipleTables = tableStr.size() > 1;
615    for (String tableStrSingle : tableStr) {
616      byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1);
617      TableName tableName = TableName.valueOf(tableStrSingle);
618      Table table = util.createTable(tableName, FAMILIES, splitKeys);
619
620      RegionLocator r = util.getConnection().getRegionLocator(tableName);
621      assertEquals("Should start with empty table", 0, util.countRows(table));
622      int numRegions = r.getStartKeys().length;
623      assertEquals("Should make " + regionNum + " regions", numRegions, regionNum);
624
625      allTables.put(tableStrSingle, table);
626      tableInfo.add(new HFileOutputFormat2.TableInfo(table.getTableDescriptor(), r));
627    }
628    Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad");
629    // Generate the bulk load files
630    runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer);
631
632    for (Table tableSingle : allTables.values()) {
633      // This doesn't write into the table, just makes files
634      assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle));
635    }
636    int numTableDirs = 0;
637    for (FileStatus tf : testDir.getFileSystem(conf).listStatus(testDir)) {
638      Path tablePath = testDir;
639
640      if (writeMultipleTables) {
641        if (allTables.containsKey(tf.getPath().getName())) {
642          ++numTableDirs;
643          tablePath = tf.getPath();
644        }
645        else {
646          continue;
647        }
648      }
649
650      // Make sure that a directory was created for every CF
651      int dir = 0;
652      for (FileStatus f : tablePath.getFileSystem(conf).listStatus(tablePath)) {
653        for (byte[] family : FAMILIES) {
654          if (Bytes.toString(family).equals(f.getPath().getName())) {
655            ++dir;
656          }
657        }
658      }
659      assertEquals("Column family not found in FS.", FAMILIES.length, dir);
660    }
661    if (writeMultipleTables) {
662      assertEquals("Dir for all input tables not created", numTableDirs, allTables.size());
663    }
664
665    Admin admin = util.getConnection().getAdmin();
666    try {
667      // handle the split case
668      if (shouldChangeRegions) {
669        Table chosenTable = allTables.values().iterator().next();
670        // Choose a semi-random table if multiple tables are available
671        LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString());
672        admin.disableTable(chosenTable.getName());
673        util.waitUntilNoRegionsInTransition();
674
675        util.deleteTable(chosenTable.getName());
676        byte[][] newSplitKeys = generateRandomSplitKeys(14);
677        Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys);
678
679        while (util.getConnection().getRegionLocator(chosenTable.getName())
680                .getAllRegionLocations().size() != 15 ||
681                !admin.isTableAvailable(table.getName())) {
682          Thread.sleep(200);
683          LOG.info("Waiting for new region assignment to happen");
684        }
685      }
686
687      // Perform the actual load
688      for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) {
689        Path tableDir = testDir;
690        String tableNameStr = singleTableInfo.getHTableDescriptor().getNameAsString();
691        LOG.info("Running LoadIncrementalHFiles on table" + tableNameStr);
692        if (writeMultipleTables) {
693          tableDir = new Path(testDir, tableNameStr);
694        }
695        Table currentTable = allTables.get(tableNameStr);
696        TableName currentTableName = currentTable.getName();
697        new LoadIncrementalHFiles(conf).doBulkLoad(tableDir, admin, currentTable, singleTableInfo
698                .getRegionLocator());
699
700        // Ensure data shows up
701        int expectedRows = 0;
702        if (putSortReducer) {
703          // no rows should be extracted
704          assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
705                  util.countRows(currentTable));
706        } else {
707          expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
708          assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows,
709                  util.countRows(currentTable));
710          Scan scan = new Scan();
711          ResultScanner results = currentTable.getScanner(scan);
712          for (Result res : results) {
713            assertEquals(FAMILIES.length, res.rawCells().length);
714            Cell first = res.rawCells()[0];
715            for (Cell kv : res.rawCells()) {
716              assertTrue(CellUtil.matchingRows(first, kv));
717              assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv)));
718            }
719          }
720          results.close();
721        }
722        String tableDigestBefore = util.checksumRows(currentTable);
723        // Check region locality
724        HDFSBlocksDistribution hbd = new HDFSBlocksDistribution();
725        for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) {
726          hbd.add(region.getHDFSBlocksDistribution());
727        }
728        for (String hostname : hostnames) {
729          float locality = hbd.getBlockLocalityIndex(hostname);
730          LOG.info("locality of [" + hostname + "]: " + locality);
731          assertEquals(100, (int) (locality * 100));
732        }
733
734        // Cause regions to reopen
735        admin.disableTable(currentTableName);
736        while (!admin.isTableDisabled(currentTableName)) {
737          Thread.sleep(200);
738          LOG.info("Waiting for table to disable");
739        }
740        admin.enableTable(currentTableName);
741        util.waitTableAvailable(currentTableName);
742        assertEquals("Data should remain after reopening of regions",
743                tableDigestBefore, util.checksumRows(currentTable));
744      }
745    } finally {
746      for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
747          tableInfoSingle.getRegionLocator().close();
748      }
749      for (Entry<String, Table> singleTable : allTables.entrySet() ) {
750        singleTable.getValue().close();
751        util.deleteTable(singleTable.getValue().getName());
752      }
753      testDir.getFileSystem(conf).delete(testDir, true);
754      util.shutdownMiniCluster();
755    }
756  }
757
758  private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir,
759                                    boolean putSortReducer) throws IOException,
760          InterruptedException, ClassNotFoundException {
761    Job job = new Job(conf, "testLocalMRIncrementalLoad");
762    job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad"));
763    job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
764        MutationSerialization.class.getName(), ResultSerialization.class.getName(),
765        KeyValueSerialization.class.getName());
766    setupRandomGeneratorMapper(job, putSortReducer);
767    if (tableInfo.size() > 1) {
768      MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo);
769      int sum = 0;
770      for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) {
771        sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size();
772      }
773      assertEquals(sum, job.getNumReduceTasks());
774    }
775    else {
776      RegionLocator regionLocator = tableInfo.get(0).getRegionLocator();
777      HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(),
778              regionLocator);
779      assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks());
780    }
781
782    FileOutputFormat.setOutputPath(job, outDir);
783
784    assertFalse(util.getTestFileSystem().exists(outDir)) ;
785
786    assertTrue(job.waitForCompletion(true));
787  }
788
789  /**
790   * Test for {@link HFileOutputFormat2#configureCompression(Configuration, HTableDescriptor)} and
791   * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}.
792   * Tests that the compression map is correctly serialized into
793   * and deserialized from configuration
794   *
795   * @throws IOException
796   */
797  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
798  public void testSerializeDeserializeFamilyCompressionMap() throws IOException {
799    for (int numCfs = 0; numCfs <= 3; numCfs++) {
800      Configuration conf = new Configuration(this.util.getConfiguration());
801      Map<String, Compression.Algorithm> familyToCompression =
802          getMockColumnFamiliesForCompression(numCfs);
803      Table table = Mockito.mock(Table.class);
804      setupMockColumnFamiliesForCompression(table, familyToCompression);
805      conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY,
806              HFileOutputFormat2.serializeColumnFamilyAttribute
807                      (HFileOutputFormat2.compressionDetails,
808                              Arrays.asList(table.getTableDescriptor())));
809
810      // read back family specific compression setting from the configuration
811      Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2
812          .createFamilyCompressionMap(conf);
813
814      // test that we have a value for all column families that matches with the
815      // used mock values
816      for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) {
817        assertEquals("Compression configuration incorrect for column family:"
818            + entry.getKey(), entry.getValue(),
819            retrievedFamilyToCompressionMap.get(entry.getKey().getBytes("UTF-8")));
820      }
821    }
822  }
823
824  private void setupMockColumnFamiliesForCompression(Table table,
825      Map<String, Compression.Algorithm> familyToCompression) throws IOException {
826    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
827    for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) {
828      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
829          .setMaxVersions(1)
830          .setCompressionType(entry.getValue())
831          .setBlockCacheEnabled(false)
832          .setTimeToLive(0));
833    }
834    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
835  }
836
837  /**
838   * @return a map from column family names to compression algorithms for
839   *         testing column family compression. Column family names have special characters
840   */
841  private Map<String, Compression.Algorithm>
842      getMockColumnFamiliesForCompression (int numCfs) {
843    Map<String, Compression.Algorithm> familyToCompression = new HashMap<>();
844    // use column family names having special characters
845    if (numCfs-- > 0) {
846      familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO);
847    }
848    if (numCfs-- > 0) {
849      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY);
850    }
851    if (numCfs-- > 0) {
852      familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ);
853    }
854    if (numCfs-- > 0) {
855      familyToCompression.put("Family3", Compression.Algorithm.NONE);
856    }
857    return familyToCompression;
858  }
859
860
861  /**
862   * Test for {@link HFileOutputFormat2#configureBloomType(HTableDescriptor, Configuration)} and
863   * {@link HFileOutputFormat2#createFamilyBloomTypeMap(Configuration)}.
864   * Tests that the compression map is correctly serialized into
865   * and deserialized from configuration
866   *
867   * @throws IOException
868   */
869  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
870  public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException {
871    for (int numCfs = 0; numCfs <= 2; numCfs++) {
872      Configuration conf = new Configuration(this.util.getConfiguration());
873      Map<String, BloomType> familyToBloomType =
874          getMockColumnFamiliesForBloomType(numCfs);
875      Table table = Mockito.mock(Table.class);
876      setupMockColumnFamiliesForBloomType(table,
877          familyToBloomType);
878      conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY,
879              HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails,
880              Arrays.asList(table.getTableDescriptor())));
881
882      // read back family specific data block encoding settings from the
883      // configuration
884      Map<byte[], BloomType> retrievedFamilyToBloomTypeMap =
885          HFileOutputFormat2
886              .createFamilyBloomTypeMap(conf);
887
888      // test that we have a value for all column families that matches with the
889      // used mock values
890      for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) {
891        assertEquals("BloomType configuration incorrect for column family:"
892            + entry.getKey(), entry.getValue(),
893            retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes("UTF-8")));
894      }
895    }
896  }
897
898  private void setupMockColumnFamiliesForBloomType(Table table,
899      Map<String, BloomType> familyToDataBlockEncoding) throws IOException {
900    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
901    for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) {
902      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
903          .setMaxVersions(1)
904          .setBloomFilterType(entry.getValue())
905          .setBlockCacheEnabled(false)
906          .setTimeToLive(0));
907    }
908    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
909  }
910
911  /**
912   * @return a map from column family names to compression algorithms for
913   *         testing column family compression. Column family names have special characters
914   */
915  private Map<String, BloomType>
916  getMockColumnFamiliesForBloomType (int numCfs) {
917    Map<String, BloomType> familyToBloomType = new HashMap<>();
918    // use column family names having special characters
919    if (numCfs-- > 0) {
920      familyToBloomType.put("Family1!@#!@#&", BloomType.ROW);
921    }
922    if (numCfs-- > 0) {
923      familyToBloomType.put("Family2=asdads&!AASD",
924          BloomType.ROWCOL);
925    }
926    if (numCfs-- > 0) {
927      familyToBloomType.put("Family3", BloomType.NONE);
928    }
929    return familyToBloomType;
930  }
931
932  /**
933   * Test for {@link HFileOutputFormat2#configureBlockSize(HTableDescriptor, Configuration)} and
934   * {@link HFileOutputFormat2#createFamilyBlockSizeMap(Configuration)}.
935   * Tests that the compression map is correctly serialized into
936   * and deserialized from configuration
937   *
938   * @throws IOException
939   */
940  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
941  public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException {
942    for (int numCfs = 0; numCfs <= 3; numCfs++) {
943      Configuration conf = new Configuration(this.util.getConfiguration());
944      Map<String, Integer> familyToBlockSize =
945          getMockColumnFamiliesForBlockSize(numCfs);
946      Table table = Mockito.mock(Table.class);
947      setupMockColumnFamiliesForBlockSize(table,
948          familyToBlockSize);
949      conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY,
950              HFileOutputFormat2.serializeColumnFamilyAttribute
951                      (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table
952                              .getTableDescriptor())));
953
954      // read back family specific data block encoding settings from the
955      // configuration
956      Map<byte[], Integer> retrievedFamilyToBlockSizeMap =
957          HFileOutputFormat2
958              .createFamilyBlockSizeMap(conf);
959
960      // test that we have a value for all column families that matches with the
961      // used mock values
962      for (Entry<String, Integer> entry : familyToBlockSize.entrySet()
963          ) {
964        assertEquals("BlockSize configuration incorrect for column family:"
965            + entry.getKey(), entry.getValue(),
966            retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes("UTF-8")));
967      }
968    }
969  }
970
971  private void setupMockColumnFamiliesForBlockSize(Table table,
972      Map<String, Integer> familyToDataBlockEncoding) throws IOException {
973    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
974    for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) {
975      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
976          .setMaxVersions(1)
977          .setBlocksize(entry.getValue())
978          .setBlockCacheEnabled(false)
979          .setTimeToLive(0));
980    }
981    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
982  }
983
984  /**
985   * @return a map from column family names to compression algorithms for
986   *         testing column family compression. Column family names have special characters
987   */
988  private Map<String, Integer>
989  getMockColumnFamiliesForBlockSize (int numCfs) {
990    Map<String, Integer> familyToBlockSize = new HashMap<>();
991    // use column family names having special characters
992    if (numCfs-- > 0) {
993      familyToBlockSize.put("Family1!@#!@#&", 1234);
994    }
995    if (numCfs-- > 0) {
996      familyToBlockSize.put("Family2=asdads&!AASD",
997          Integer.MAX_VALUE);
998    }
999    if (numCfs-- > 0) {
1000      familyToBlockSize.put("Family2=asdads&!AASD",
1001          Integer.MAX_VALUE);
1002    }
1003    if (numCfs-- > 0) {
1004      familyToBlockSize.put("Family3", 0);
1005    }
1006    return familyToBlockSize;
1007  }
1008
1009  /**
1010   * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)}
1011   * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}.
1012   * Tests that the compression map is correctly serialized into
1013   * and deserialized from configuration
1014   *
1015   * @throws IOException
1016   */
1017  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
1018  public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException {
1019    for (int numCfs = 0; numCfs <= 3; numCfs++) {
1020      Configuration conf = new Configuration(this.util.getConfiguration());
1021      Map<String, DataBlockEncoding> familyToDataBlockEncoding =
1022          getMockColumnFamiliesForDataBlockEncoding(numCfs);
1023      Table table = Mockito.mock(Table.class);
1024      setupMockColumnFamiliesForDataBlockEncoding(table,
1025          familyToDataBlockEncoding);
1026      HTableDescriptor tableDescriptor = table.getTableDescriptor();
1027      conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY,
1028              HFileOutputFormat2.serializeColumnFamilyAttribute
1029                      (HFileOutputFormat2.dataBlockEncodingDetails, Arrays
1030                      .asList(tableDescriptor)));
1031
1032      // read back family specific data block encoding settings from the
1033      // configuration
1034      Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap =
1035          HFileOutputFormat2
1036          .createFamilyDataBlockEncodingMap(conf);
1037
1038      // test that we have a value for all column families that matches with the
1039      // used mock values
1040      for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
1041        assertEquals("DataBlockEncoding configuration incorrect for column family:"
1042            + entry.getKey(), entry.getValue(),
1043            retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes("UTF-8")));
1044      }
1045    }
1046  }
1047
1048  private void setupMockColumnFamiliesForDataBlockEncoding(Table table,
1049      Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException {
1050    HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]);
1051    for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) {
1052      mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey())
1053          .setMaxVersions(1)
1054          .setDataBlockEncoding(entry.getValue())
1055          .setBlockCacheEnabled(false)
1056          .setTimeToLive(0));
1057    }
1058    Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor();
1059  }
1060
1061  /**
1062   * @return a map from column family names to compression algorithms for
1063   *         testing column family compression. Column family names have special characters
1064   */
1065  private Map<String, DataBlockEncoding>
1066      getMockColumnFamiliesForDataBlockEncoding (int numCfs) {
1067    Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<>();
1068    // use column family names having special characters
1069    if (numCfs-- > 0) {
1070      familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF);
1071    }
1072    if (numCfs-- > 0) {
1073      familyToDataBlockEncoding.put("Family2=asdads&!AASD",
1074          DataBlockEncoding.FAST_DIFF);
1075    }
1076    if (numCfs-- > 0) {
1077      familyToDataBlockEncoding.put("Family2=asdads&!AASD",
1078          DataBlockEncoding.PREFIX);
1079    }
1080    if (numCfs-- > 0) {
1081      familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE);
1082    }
1083    return familyToDataBlockEncoding;
1084  }
1085
1086  private void setupMockStartKeys(RegionLocator table) throws IOException {
1087    byte[][] mockKeys = new byte[][] {
1088        HConstants.EMPTY_BYTE_ARRAY,
1089        Bytes.toBytes("aaa"),
1090        Bytes.toBytes("ggg"),
1091        Bytes.toBytes("zzz")
1092    };
1093    Mockito.doReturn(mockKeys).when(table).getStartKeys();
1094  }
1095
1096  private void setupMockTableName(RegionLocator table) throws IOException {
1097    TableName mockTableName = TableName.valueOf("mock_table");
1098    Mockito.doReturn(mockTableName).when(table).getName();
1099  }
1100
1101  /**
1102   * Test that {@link HFileOutputFormat2} RecordWriter uses compression and
1103   * bloom filter settings from the column family descriptor
1104   */
1105  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
1106  public void testColumnFamilySettings() throws Exception {
1107    Configuration conf = new Configuration(this.util.getConfiguration());
1108    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
1109    TaskAttemptContext context = null;
1110    Path dir = util.getDataTestDir("testColumnFamilySettings");
1111
1112    // Setup table descriptor
1113    Table table = Mockito.mock(Table.class);
1114    RegionLocator regionLocator = Mockito.mock(RegionLocator.class);
1115    HTableDescriptor htd = new HTableDescriptor(TABLE_NAMES[0]);
1116    Mockito.doReturn(htd).when(table).getTableDescriptor();
1117    for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) {
1118      htd.addFamily(hcd);
1119    }
1120
1121    // set up the table to return some mock keys
1122    setupMockStartKeys(regionLocator);
1123
1124    try {
1125      // partial map red setup to get an operational writer for testing
1126      // We turn off the sequence file compression, because DefaultCodec
1127      // pollutes the GZip codec pool with an incompatible compressor.
1128      conf.set("io.seqfile.compression.type", "NONE");
1129      conf.set("hbase.fs.tmp.dir", dir.toString());
1130      // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs
1131      conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false);
1132
1133      Job job = new Job(conf, "testLocalMRIncrementalLoad");
1134      job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings"));
1135      setupRandomGeneratorMapper(job, false);
1136      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
1137      FileOutputFormat.setOutputPath(job, dir);
1138      context = createTestTaskAttemptContext(job);
1139      HFileOutputFormat2 hof = new HFileOutputFormat2();
1140      writer = hof.getRecordWriter(context);
1141
1142      // write out random rows
1143      writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT);
1144      writer.close(context);
1145
1146      // Make sure that a directory was created for every CF
1147      FileSystem fs = dir.getFileSystem(conf);
1148
1149      // commit so that the filesystem has one directory per column family
1150      hof.getOutputCommitter(context).commitTask(context);
1151      hof.getOutputCommitter(context).commitJob(context);
1152      FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs));
1153      assertEquals(htd.getFamilies().size(), families.length);
1154      for (FileStatus f : families) {
1155        String familyStr = f.getPath().getName();
1156        HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr));
1157        // verify that the compression on this file matches the configured
1158        // compression
1159        Path dataFilePath = fs.listStatus(f.getPath())[0].getPath();
1160        Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf);
1161        Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1162
1163        byte[] bloomFilter = fileInfo.get(BLOOM_FILTER_TYPE_KEY);
1164        if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE");
1165        assertEquals("Incorrect bloom filter used for column family " + familyStr +
1166          "(reader: " + reader + ")",
1167          hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter)));
1168        assertEquals("Incorrect compression used for column family " + familyStr +
1169          "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression());
1170      }
1171    } finally {
1172      dir.getFileSystem(conf).delete(dir, true);
1173    }
1174  }
1175
1176  /**
1177   * Write random values to the writer assuming a table created using
1178   * {@link #FAMILIES} as column family descriptors
1179   */
1180  private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer,
1181      TaskAttemptContext context, Set<byte[]> families, int numRows)
1182      throws IOException, InterruptedException {
1183    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
1184    int valLength = 10;
1185    byte valBytes[] = new byte[valLength];
1186
1187    int taskId = context.getTaskAttemptID().getTaskID().getId();
1188    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
1189    final byte [] qualifier = Bytes.toBytes("data");
1190    Random random = new Random();
1191    for (int i = 0; i < numRows; i++) {
1192
1193      Bytes.putInt(keyBytes, 0, i);
1194      random.nextBytes(valBytes);
1195      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);
1196
1197      for (byte[] family : families) {
1198        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
1199        writer.write(key, kv);
1200      }
1201    }
1202  }
1203
1204  /**
1205   * This test is to test the scenario happened in HBASE-6901.
1206   * All files are bulk loaded and excluded from minor compaction.
1207   * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException
1208   * will be thrown.
1209   */
1210  @Ignore ("Flakey: See HBASE-9051") @Test
1211  public void testExcludeAllFromMinorCompaction() throws Exception {
1212    Configuration conf = util.getConfiguration();
1213    conf.setInt("hbase.hstore.compaction.min", 2);
1214    generateRandomStartKeys(5);
1215
1216    util.startMiniCluster();
1217    try (Connection conn = ConnectionFactory.createConnection();
1218        Admin admin = conn.getAdmin();
1219        Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
1220        RegionLocator locator = conn.getRegionLocator(TABLE_NAMES[0])) {
1221      final FileSystem fs = util.getDFSCluster().getFileSystem();
1222      assertEquals("Should start with empty table", 0, util.countRows(table));
1223
1224      // deep inspection: get the StoreFile dir
1225      final Path storePath = new Path(
1226        FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
1227          new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
1228            Bytes.toString(FAMILIES[0])));
1229      assertEquals(0, fs.listStatus(storePath).length);
1230
1231      // Generate two bulk load files
1232      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1233          true);
1234
1235      for (int i = 0; i < 2; i++) {
1236        Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i);
1237        runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
1238                .getTableDescriptor(), conn.getRegionLocator(TABLE_NAMES[0]))), testDir, false);
1239        // Perform the actual load
1240        new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator);
1241      }
1242
1243      // Ensure data shows up
1244      int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1245      assertEquals("LoadIncrementalHFiles should put expected data in table",
1246          expectedRows, util.countRows(table));
1247
1248      // should have a second StoreFile now
1249      assertEquals(2, fs.listStatus(storePath).length);
1250
1251      // minor compactions shouldn't get rid of the file
1252      admin.compact(TABLE_NAMES[0]);
1253      try {
1254        quickPoll(new Callable<Boolean>() {
1255          @Override
1256          public Boolean call() throws Exception {
1257            List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
1258            for (HRegion region : regions) {
1259              for (HStore store : region.getStores()) {
1260                store.closeAndArchiveCompactedFiles();
1261              }
1262            }
1263            return fs.listStatus(storePath).length == 1;
1264          }
1265        }, 5000);
1266        throw new IOException("SF# = " + fs.listStatus(storePath).length);
1267      } catch (AssertionError ae) {
1268        // this is expected behavior
1269      }
1270
1271      // a major compaction should work though
1272      admin.majorCompact(TABLE_NAMES[0]);
1273      quickPoll(new Callable<Boolean>() {
1274        @Override
1275        public Boolean call() throws Exception {
1276          List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]);
1277          for (HRegion region : regions) {
1278            for (HStore store : region.getStores()) {
1279              store.closeAndArchiveCompactedFiles();
1280            }
1281          }
1282          return fs.listStatus(storePath).length == 1;
1283        }
1284      }, 5000);
1285
1286    } finally {
1287      util.shutdownMiniCluster();
1288    }
1289  }
1290
1291  @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test
1292  public void testExcludeMinorCompaction() throws Exception {
1293    Configuration conf = util.getConfiguration();
1294    conf.setInt("hbase.hstore.compaction.min", 2);
1295    generateRandomStartKeys(5);
1296
1297    util.startMiniCluster();
1298    try (Connection conn = ConnectionFactory.createConnection(conf);
1299        Admin admin = conn.getAdmin()){
1300      Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction");
1301      final FileSystem fs = util.getDFSCluster().getFileSystem();
1302      Table table = util.createTable(TABLE_NAMES[0], FAMILIES);
1303      assertEquals("Should start with empty table", 0, util.countRows(table));
1304
1305      // deep inspection: get the StoreFile dir
1306      final Path storePath = new Path(
1307        FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]),
1308          new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(),
1309            Bytes.toString(FAMILIES[0])));
1310      assertEquals(0, fs.listStatus(storePath).length);
1311
1312      // put some data in it and flush to create a storefile
1313      Put p = new Put(Bytes.toBytes("test"));
1314      p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1"));
1315      table.put(p);
1316      admin.flush(TABLE_NAMES[0]);
1317      assertEquals(1, util.countRows(table));
1318      quickPoll(new Callable<Boolean>() {
1319        @Override
1320        public Boolean call() throws Exception {
1321          return fs.listStatus(storePath).length == 1;
1322        }
1323      }, 5000);
1324
1325      // Generate a bulk load file with more rows
1326      conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude",
1327          true);
1328
1329      RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAMES[0]);
1330      runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table
1331                      .getTableDescriptor(), regionLocator)), testDir, false);
1332
1333      // Perform the actual load
1334      new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator);
1335
1336      // Ensure data shows up
1337      int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT;
1338      assertEquals("LoadIncrementalHFiles should put expected data in table",
1339          expectedRows + 1, util.countRows(table));
1340
1341      // should have a second StoreFile now
1342      assertEquals(2, fs.listStatus(storePath).length);
1343
1344      // minor compactions shouldn't get rid of the file
1345      admin.compact(TABLE_NAMES[0]);
1346      try {
1347        quickPoll(new Callable<Boolean>() {
1348          @Override
1349          public Boolean call() throws Exception {
1350            return fs.listStatus(storePath).length == 1;
1351          }
1352        }, 5000);
1353        throw new IOException("SF# = " + fs.listStatus(storePath).length);
1354      } catch (AssertionError ae) {
1355        // this is expected behavior
1356      }
1357
1358      // a major compaction should work though
1359      admin.majorCompact(TABLE_NAMES[0]);
1360      quickPoll(new Callable<Boolean>() {
1361        @Override
1362        public Boolean call() throws Exception {
1363          return fs.listStatus(storePath).length == 1;
1364        }
1365      }, 5000);
1366
1367    } finally {
1368      util.shutdownMiniCluster();
1369    }
1370  }
1371
1372  private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception {
1373    int sleepMs = 10;
1374    int retries = (int) Math.ceil(((double) waitMs) / sleepMs);
1375    while (retries-- > 0) {
1376      if (c.call().booleanValue()) {
1377        return;
1378      }
1379      Thread.sleep(sleepMs);
1380    }
1381    fail();
1382  }
1383
1384  public static void main(String args[]) throws Exception {
1385    new TestHFileOutputFormat2().manualTest(args);
1386  }
1387
1388  public void manualTest(String args[]) throws Exception {
1389    Configuration conf = HBaseConfiguration.create();
1390    util = new HBaseTestingUtility(conf);
1391    if ("newtable".equals(args[0])) {
1392      TableName tname = TableName.valueOf(args[1]);
1393      byte[][] splitKeys = generateRandomSplitKeys(4);
1394      Table table = util.createTable(tname, FAMILIES, splitKeys);
1395    } else if ("incremental".equals(args[0])) {
1396      TableName tname = TableName.valueOf(args[1]);
1397      try(Connection c = ConnectionFactory.createConnection(conf);
1398          Admin admin = c.getAdmin();
1399          RegionLocator regionLocator = c.getRegionLocator(tname)) {
1400        Path outDir = new Path("incremental-out");
1401        runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(admin
1402                .getTableDescriptor(tname), regionLocator)), outDir, false);
1403      }
1404    } else {
1405      throw new RuntimeException(
1406          "usage: TestHFileOutputFormat2 newtable | incremental");
1407    }
1408  }
1409
1410  @Test
1411  public void testBlockStoragePolicy() throws Exception {
1412    util = new HBaseTestingUtility();
1413    Configuration conf = util.getConfiguration();
1414    conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY, "ALL_SSD");
1415
1416    conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY_CF_PREFIX +
1417            Bytes.toString(HFileOutputFormat2.combineTableNameSuffix(
1418                    TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD");
1419    Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0]));
1420    Path cf2Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[1]));
1421    util.startMiniDFSCluster(3);
1422    FileSystem fs = util.getDFSCluster().getFileSystem();
1423    try {
1424      fs.mkdirs(cf1Dir);
1425      fs.mkdirs(cf2Dir);
1426
1427      // the original block storage policy would be HOT
1428      String spA = getStoragePolicyName(fs, cf1Dir);
1429      String spB = getStoragePolicyName(fs, cf2Dir);
1430      LOG.debug("Storage policy of cf 0: [" + spA + "].");
1431      LOG.debug("Storage policy of cf 1: [" + spB + "].");
1432      assertEquals("HOT", spA);
1433      assertEquals("HOT", spB);
1434
1435      // alter table cf schema to change storage policies
1436      HFileOutputFormat2.configureStoragePolicy(conf, fs,
1437              HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir);
1438      HFileOutputFormat2.configureStoragePolicy(conf, fs,
1439              HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir);
1440      spA = getStoragePolicyName(fs, cf1Dir);
1441      spB = getStoragePolicyName(fs, cf2Dir);
1442      LOG.debug("Storage policy of cf 0: [" + spA + "].");
1443      LOG.debug("Storage policy of cf 1: [" + spB + "].");
1444      assertNotNull(spA);
1445      assertEquals("ONE_SSD", spA);
1446      assertNotNull(spB);
1447      assertEquals("ALL_SSD", spB);
1448    } finally {
1449      fs.delete(cf1Dir, true);
1450      fs.delete(cf2Dir, true);
1451      util.shutdownMiniDFSCluster();
1452    }
1453  }
1454
1455  private String getStoragePolicyName(FileSystem fs, Path path) {
1456    try {
1457      Object blockStoragePolicySpi = ReflectionUtils.invokeMethod(fs, "getStoragePolicy", path);
1458      return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName");
1459    } catch (Exception e) {
1460      // Maybe fail because of using old HDFS version, try the old way
1461      if (LOG.isTraceEnabled()) {
1462        LOG.trace("Failed to get policy directly", e);
1463      }
1464      String policy = getStoragePolicyNameForOldHDFSVersion(fs, path);
1465      return policy == null ? "HOT" : policy;// HOT by default
1466    }
1467  }
1468
1469  private String getStoragePolicyNameForOldHDFSVersion(FileSystem fs, Path path) {
1470    try {
1471      if (fs instanceof DistributedFileSystem) {
1472        DistributedFileSystem dfs = (DistributedFileSystem) fs;
1473        HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath());
1474        if (null != status) {
1475          byte storagePolicyId = status.getStoragePolicy();
1476          Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED");
1477          if (storagePolicyId != idUnspecified.getByte(BlockStoragePolicySuite.class)) {
1478            BlockStoragePolicy[] policies = dfs.getStoragePolicies();
1479            for (BlockStoragePolicy policy : policies) {
1480              if (policy.getId() == storagePolicyId) {
1481                return policy.getName();
1482              }
1483            }
1484          }
1485        }
1486      }
1487    } catch (Throwable e) {
1488      LOG.warn("failed to get block storage policy of [" + path + "]", e);
1489    }
1490
1491    return null;
1492  }
1493}
1494