001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_TYPE_KEY; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertFalse; 023import static org.junit.Assert.assertNotNull; 024import static org.junit.Assert.assertNotSame; 025import static org.junit.Assert.assertTrue; 026import static org.junit.Assert.fail; 027 028import java.io.IOException; 029import java.lang.reflect.Field; 030import java.util.ArrayList; 031import java.util.Arrays; 032import java.util.HashMap; 033import java.util.List; 034import java.util.Map; 035import java.util.Map.Entry; 036import java.util.Random; 037import java.util.Set; 038import java.util.concurrent.Callable; 039import java.util.stream.Collectors; 040import java.util.stream.Stream; 041import org.apache.hadoop.conf.Configuration; 042import org.apache.hadoop.fs.FileStatus; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.LocatedFileStatus; 045import org.apache.hadoop.fs.Path; 046import org.apache.hadoop.fs.RemoteIterator; 047import org.apache.hadoop.hbase.ArrayBackedTag; 048import org.apache.hadoop.hbase.Cell; 049import org.apache.hadoop.hbase.CellUtil; 050import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 051import org.apache.hadoop.hbase.HBaseClassTestRule; 052import org.apache.hadoop.hbase.HBaseConfiguration; 053import org.apache.hadoop.hbase.HBaseTestingUtility; 054import org.apache.hadoop.hbase.HColumnDescriptor; 055import org.apache.hadoop.hbase.HConstants; 056import org.apache.hadoop.hbase.HDFSBlocksDistribution; 057import org.apache.hadoop.hbase.HTableDescriptor; 058import org.apache.hadoop.hbase.HadoopShims; 059import org.apache.hadoop.hbase.KeyValue; 060import org.apache.hadoop.hbase.PerformanceEvaluation; 061import org.apache.hadoop.hbase.PrivateCellUtil; 062import org.apache.hadoop.hbase.TableName; 063import org.apache.hadoop.hbase.Tag; 064import org.apache.hadoop.hbase.TagType; 065import org.apache.hadoop.hbase.client.Admin; 066import org.apache.hadoop.hbase.client.Connection; 067import org.apache.hadoop.hbase.client.ConnectionFactory; 068import org.apache.hadoop.hbase.client.Put; 069import org.apache.hadoop.hbase.client.RegionLocator; 070import org.apache.hadoop.hbase.client.Result; 071import org.apache.hadoop.hbase.client.ResultScanner; 072import org.apache.hadoop.hbase.client.Scan; 073import org.apache.hadoop.hbase.client.Table; 074import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 075import org.apache.hadoop.hbase.io.compress.Compression; 076import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 077import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 078import org.apache.hadoop.hbase.io.hfile.CacheConfig; 079import org.apache.hadoop.hbase.io.hfile.HFile; 080import org.apache.hadoop.hbase.io.hfile.HFile.Reader; 081import org.apache.hadoop.hbase.io.hfile.HFileScanner; 082import org.apache.hadoop.hbase.regionserver.BloomType; 083import org.apache.hadoop.hbase.regionserver.HRegion; 084import org.apache.hadoop.hbase.regionserver.HStore; 085import org.apache.hadoop.hbase.regionserver.TestHRegionFileSystem; 086import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; 087import org.apache.hadoop.hbase.testclassification.LargeTests; 088import org.apache.hadoop.hbase.testclassification.VerySlowMapReduceTests; 089import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles; 090import org.apache.hadoop.hbase.util.Bytes; 091import org.apache.hadoop.hbase.util.FSUtils; 092import org.apache.hadoop.hbase.util.ReflectionUtils; 093import org.apache.hadoop.hdfs.DistributedFileSystem; 094import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; 095import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; 096import org.apache.hadoop.hdfs.server.blockmanagement.BlockStoragePolicySuite; 097import org.apache.hadoop.io.NullWritable; 098import org.apache.hadoop.mapreduce.Job; 099import org.apache.hadoop.mapreduce.Mapper; 100import org.apache.hadoop.mapreduce.RecordWriter; 101import org.apache.hadoop.mapreduce.TaskAttemptContext; 102import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 103import org.junit.ClassRule; 104import org.junit.Ignore; 105import org.junit.Test; 106import org.junit.experimental.categories.Category; 107import org.mockito.Mockito; 108import org.slf4j.Logger; 109import org.slf4j.LoggerFactory; 110 111/** 112 * Simple test for {@link HFileOutputFormat2}. 113 * Sets up and runs a mapreduce job that writes hfile output. 114 * Creates a few inner classes to implement splits and an inputformat that 115 * emits keys and values like those of {@link PerformanceEvaluation}. 116 */ 117@Category({VerySlowMapReduceTests.class, LargeTests.class}) 118//TODO : Remove this in 3.0 119public class TestHFileOutputFormat2 { 120 121 @ClassRule 122 public static final HBaseClassTestRule CLASS_RULE = 123 HBaseClassTestRule.forClass(TestHFileOutputFormat2.class); 124 125 private final static int ROWSPERSPLIT = 1024; 126 127 public static final byte[] FAMILY_NAME = TestHRegionFileSystem.FAMILY_NAME; 128 private static final byte[][] FAMILIES = { 129 Bytes.add(FAMILY_NAME, Bytes.toBytes("-A")), Bytes.add(FAMILY_NAME, Bytes.toBytes("-B"))}; 130 private static final TableName[] TABLE_NAMES = Stream.of("TestTable", "TestTable2", 131 "TestTable3").map(TableName::valueOf).toArray(TableName[]::new); 132 133 private HBaseTestingUtility util = new HBaseTestingUtility(); 134 135 private static final Logger LOG = LoggerFactory.getLogger(TestHFileOutputFormat2.class); 136 137 /** 138 * Simple mapper that makes KeyValue output. 139 */ 140 static class RandomKVGeneratingMapper 141 extends Mapper<NullWritable, NullWritable, 142 ImmutableBytesWritable, Cell> { 143 144 private int keyLength; 145 private static final int KEYLEN_DEFAULT=10; 146 private static final String KEYLEN_CONF="randomkv.key.length"; 147 148 private int valLength; 149 private static final int VALLEN_DEFAULT=10; 150 private static final String VALLEN_CONF="randomkv.val.length"; 151 private static final byte [] QUALIFIER = Bytes.toBytes("data"); 152 private boolean multiTableMapper = false; 153 private TableName[] tables = null; 154 155 156 @Override 157 protected void setup(Context context) throws IOException, 158 InterruptedException { 159 super.setup(context); 160 161 Configuration conf = context.getConfiguration(); 162 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT); 163 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT); 164 multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, 165 false); 166 if (multiTableMapper) { 167 tables = TABLE_NAMES; 168 } else { 169 tables = new TableName[]{TABLE_NAMES[0]}; 170 } 171 } 172 173 @Override 174 protected void map( 175 NullWritable n1, NullWritable n2, 176 Mapper<NullWritable, NullWritable, 177 ImmutableBytesWritable,Cell>.Context context) 178 throws java.io.IOException ,InterruptedException 179 { 180 181 byte keyBytes[] = new byte[keyLength]; 182 byte valBytes[] = new byte[valLength]; 183 184 int taskId = context.getTaskAttemptID().getTaskID().getId(); 185 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; 186 Random random = new Random(); 187 byte[] key; 188 for (int j = 0; j < tables.length; ++j) { 189 for (int i = 0; i < ROWSPERSPLIT; i++) { 190 random.nextBytes(keyBytes); 191 // Ensure that unique tasks generate unique keys 192 keyBytes[keyLength - 1] = (byte) (taskId & 0xFF); 193 random.nextBytes(valBytes); 194 key = keyBytes; 195 if (multiTableMapper) { 196 key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes); 197 } 198 199 for (byte[] family : TestHFileOutputFormat2.FAMILIES) { 200 Cell kv = new KeyValue(keyBytes, family, QUALIFIER, valBytes); 201 context.write(new ImmutableBytesWritable(key), kv); 202 } 203 } 204 } 205 } 206 } 207 208 /** 209 * Simple mapper that makes Put output. 210 */ 211 static class RandomPutGeneratingMapper 212 extends Mapper<NullWritable, NullWritable, 213 ImmutableBytesWritable, Put> { 214 215 private int keyLength; 216 private static final int KEYLEN_DEFAULT = 10; 217 private static final String KEYLEN_CONF = "randomkv.key.length"; 218 219 private int valLength; 220 private static final int VALLEN_DEFAULT = 10; 221 private static final String VALLEN_CONF = "randomkv.val.length"; 222 private static final byte[] QUALIFIER = Bytes.toBytes("data"); 223 private boolean multiTableMapper = false; 224 private TableName[] tables = null; 225 226 @Override 227 protected void setup(Context context) throws IOException, 228 InterruptedException { 229 super.setup(context); 230 231 Configuration conf = context.getConfiguration(); 232 keyLength = conf.getInt(KEYLEN_CONF, KEYLEN_DEFAULT); 233 valLength = conf.getInt(VALLEN_CONF, VALLEN_DEFAULT); 234 multiTableMapper = conf.getBoolean(HFileOutputFormat2.MULTI_TABLE_HFILEOUTPUTFORMAT_CONF_KEY, 235 false); 236 if (multiTableMapper) { 237 tables = TABLE_NAMES; 238 } else { 239 tables = new TableName[]{TABLE_NAMES[0]}; 240 } 241 } 242 243 @Override 244 protected void map( 245 NullWritable n1, NullWritable n2, 246 Mapper<NullWritable, NullWritable, 247 ImmutableBytesWritable, Put>.Context context) 248 throws java.io.IOException, InterruptedException { 249 250 byte keyBytes[] = new byte[keyLength]; 251 byte valBytes[] = new byte[valLength]; 252 253 int taskId = context.getTaskAttemptID().getTaskID().getId(); 254 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; 255 256 Random random = new Random(); 257 byte[] key; 258 for (int j = 0; j < tables.length; ++j) { 259 for (int i = 0; i < ROWSPERSPLIT; i++) { 260 random.nextBytes(keyBytes); 261 // Ensure that unique tasks generate unique keys 262 keyBytes[keyLength - 1] = (byte) (taskId & 0xFF); 263 random.nextBytes(valBytes); 264 key = keyBytes; 265 if (multiTableMapper) { 266 key = MultiTableHFileOutputFormat.createCompositeKey(tables[j].getName(), keyBytes); 267 } 268 269 for (byte[] family : TestHFileOutputFormat2.FAMILIES) { 270 Put p = new Put(keyBytes); 271 p.addColumn(family, QUALIFIER, valBytes); 272 // set TTL to very low so that the scan does not return any value 273 p.setTTL(1l); 274 context.write(new ImmutableBytesWritable(key), p); 275 } 276 } 277 } 278 } 279 } 280 281 private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) { 282 if (putSortReducer) { 283 job.setInputFormatClass(NMapInputFormat.class); 284 job.setMapperClass(RandomPutGeneratingMapper.class); 285 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 286 job.setMapOutputValueClass(Put.class); 287 } else { 288 job.setInputFormatClass(NMapInputFormat.class); 289 job.setMapperClass(RandomKVGeneratingMapper.class); 290 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 291 job.setMapOutputValueClass(KeyValue.class); 292 } 293 } 294 295 /** 296 * Test that {@link HFileOutputFormat2} RecordWriter amends timestamps if 297 * passed a keyvalue whose timestamp is {@link HConstants#LATEST_TIMESTAMP}. 298 * @see <a href="https://issues.apache.org/jira/browse/HBASE-2615">HBASE-2615</a> 299 */ 300 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 301 public void test_LATEST_TIMESTAMP_isReplaced() 302 throws Exception { 303 Configuration conf = new Configuration(this.util.getConfiguration()); 304 RecordWriter<ImmutableBytesWritable, Cell> writer = null; 305 TaskAttemptContext context = null; 306 Path dir = 307 util.getDataTestDir("test_LATEST_TIMESTAMP_isReplaced"); 308 try { 309 Job job = new Job(conf); 310 FileOutputFormat.setOutputPath(job, dir); 311 context = createTestTaskAttemptContext(job); 312 HFileOutputFormat2 hof = new HFileOutputFormat2(); 313 writer = hof.getRecordWriter(context); 314 final byte [] b = Bytes.toBytes("b"); 315 316 // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be 317 // changed by call to write. Check all in kv is same but ts. 318 KeyValue kv = new KeyValue(b, b, b); 319 KeyValue original = kv.clone(); 320 writer.write(new ImmutableBytesWritable(), kv); 321 assertFalse(original.equals(kv)); 322 assertTrue(Bytes.equals(CellUtil.cloneRow(original), CellUtil.cloneRow(kv))); 323 assertTrue(Bytes.equals(CellUtil.cloneFamily(original), CellUtil.cloneFamily(kv))); 324 assertTrue(Bytes.equals(CellUtil.cloneQualifier(original), CellUtil.cloneQualifier(kv))); 325 assertNotSame(original.getTimestamp(), kv.getTimestamp()); 326 assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); 327 328 // Test 2. Now test passing a kv that has explicit ts. It should not be 329 // changed by call to record write. 330 kv = new KeyValue(b, b, b, kv.getTimestamp() - 1, b); 331 original = kv.clone(); 332 writer.write(new ImmutableBytesWritable(), kv); 333 assertTrue(original.equals(kv)); 334 } finally { 335 if (writer != null && context != null) writer.close(context); 336 dir.getFileSystem(conf).delete(dir, true); 337 } 338 } 339 340 private TaskAttemptContext createTestTaskAttemptContext(final Job job) 341 throws Exception { 342 HadoopShims hadoop = CompatibilitySingletonFactory.getInstance(HadoopShims.class); 343 TaskAttemptContext context = hadoop.createTestTaskAttemptContext( 344 job, "attempt_201402131733_0001_m_000000_0"); 345 return context; 346 } 347 348 /* 349 * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE 350 * metadata used by time-restricted scans. 351 */ 352 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 353 public void test_TIMERANGE() throws Exception { 354 Configuration conf = new Configuration(this.util.getConfiguration()); 355 RecordWriter<ImmutableBytesWritable, Cell> writer = null; 356 TaskAttemptContext context = null; 357 Path dir = 358 util.getDataTestDir("test_TIMERANGE_present"); 359 LOG.info("Timerange dir writing to dir: "+ dir); 360 try { 361 // build a record writer using HFileOutputFormat2 362 Job job = new Job(conf); 363 FileOutputFormat.setOutputPath(job, dir); 364 context = createTestTaskAttemptContext(job); 365 HFileOutputFormat2 hof = new HFileOutputFormat2(); 366 writer = hof.getRecordWriter(context); 367 368 // Pass two key values with explicit times stamps 369 final byte [] b = Bytes.toBytes("b"); 370 371 // value 1 with timestamp 2000 372 KeyValue kv = new KeyValue(b, b, b, 2000, b); 373 KeyValue original = kv.clone(); 374 writer.write(new ImmutableBytesWritable(), kv); 375 assertEquals(original,kv); 376 377 // value 2 with timestamp 1000 378 kv = new KeyValue(b, b, b, 1000, b); 379 original = kv.clone(); 380 writer.write(new ImmutableBytesWritable(), kv); 381 assertEquals(original, kv); 382 383 // verify that the file has the proper FileInfo. 384 writer.close(context); 385 386 // the generated file lives 1 directory down from the attempt directory 387 // and is the only file, e.g. 388 // _attempt__0000_r_000000_0/b/1979617994050536795 389 FileSystem fs = FileSystem.get(conf); 390 Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent(); 391 FileStatus[] sub1 = fs.listStatus(attemptDirectory); 392 FileStatus[] file = fs.listStatus(sub1[0].getPath()); 393 394 // open as HFile Reader and pull out TIMERANGE FileInfo. 395 HFile.Reader rd = 396 HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), true, conf); 397 Map<byte[],byte[]> finfo = rd.loadFileInfo(); 398 byte[] range = finfo.get("TIMERANGE".getBytes("UTF-8")); 399 assertNotNull(range); 400 401 // unmarshall and check values. 402 TimeRangeTracker timeRangeTracker =TimeRangeTracker.parseFrom(range); 403 LOG.info(timeRangeTracker.getMin() + 404 "...." + timeRangeTracker.getMax()); 405 assertEquals(1000, timeRangeTracker.getMin()); 406 assertEquals(2000, timeRangeTracker.getMax()); 407 rd.close(); 408 } finally { 409 if (writer != null && context != null) writer.close(context); 410 dir.getFileSystem(conf).delete(dir, true); 411 } 412 } 413 414 /** 415 * Run small MR job. 416 */ 417 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 418 public void testWritingPEData() throws Exception { 419 Configuration conf = util.getConfiguration(); 420 Path testDir = util.getDataTestDirOnTestFS("testWritingPEData"); 421 FileSystem fs = testDir.getFileSystem(conf); 422 423 // Set down this value or we OOME in eclipse. 424 conf.setInt("mapreduce.task.io.sort.mb", 20); 425 // Write a few files. 426 conf.setLong(HConstants.HREGION_MAX_FILESIZE, 64 * 1024); 427 428 Job job = new Job(conf, "testWritingPEData"); 429 setupRandomGeneratorMapper(job, false); 430 // This partitioner doesn't work well for number keys but using it anyways 431 // just to demonstrate how to configure it. 432 byte[] startKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; 433 byte[] endKey = new byte[RandomKVGeneratingMapper.KEYLEN_DEFAULT]; 434 435 Arrays.fill(startKey, (byte)0); 436 Arrays.fill(endKey, (byte)0xff); 437 438 job.setPartitionerClass(SimpleTotalOrderPartitioner.class); 439 // Set start and end rows for partitioner. 440 SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); 441 SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); 442 job.setReducerClass(KeyValueSortReducer.class); 443 job.setOutputFormatClass(HFileOutputFormat2.class); 444 job.setNumReduceTasks(4); 445 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), 446 MutationSerialization.class.getName(), ResultSerialization.class.getName(), 447 KeyValueSerialization.class.getName()); 448 449 FileOutputFormat.setOutputPath(job, testDir); 450 assertTrue(job.waitForCompletion(false)); 451 FileStatus [] files = fs.listStatus(testDir); 452 assertTrue(files.length > 0); 453 } 454 455 /** 456 * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into 457 * hfile. 458 */ 459 @Test 460 public void test_WritingTagData() 461 throws Exception { 462 Configuration conf = new Configuration(this.util.getConfiguration()); 463 final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version"; 464 conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS); 465 RecordWriter<ImmutableBytesWritable, Cell> writer = null; 466 TaskAttemptContext context = null; 467 Path dir = 468 util.getDataTestDir("WritingTagData"); 469 try { 470 conf.set(HFileOutputFormat2.OUTPUT_TABLE_NAME_CONF_KEY, TABLE_NAMES[0].getNameAsString()); 471 // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs 472 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); 473 Job job = new Job(conf); 474 FileOutputFormat.setOutputPath(job, dir); 475 context = createTestTaskAttemptContext(job); 476 HFileOutputFormat2 hof = new HFileOutputFormat2(); 477 writer = hof.getRecordWriter(context); 478 final byte [] b = Bytes.toBytes("b"); 479 480 List< Tag > tags = new ArrayList<>(); 481 tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670))); 482 KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags); 483 writer.write(new ImmutableBytesWritable(), kv); 484 writer.close(context); 485 writer = null; 486 FileSystem fs = dir.getFileSystem(conf); 487 RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true); 488 while(iterator.hasNext()) { 489 LocatedFileStatus keyFileStatus = iterator.next(); 490 HFile.Reader reader = 491 HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), true, conf); 492 HFileScanner scanner = reader.getScanner(false, false, false); 493 scanner.seekTo(); 494 Cell cell = scanner.getCell(); 495 List<Tag> tagsFromCell = PrivateCellUtil.getTags(cell); 496 assertTrue(tagsFromCell.size() > 0); 497 for (Tag tag : tagsFromCell) { 498 assertTrue(tag.getType() == TagType.TTL_TAG_TYPE); 499 } 500 } 501 } finally { 502 if (writer != null && context != null) writer.close(context); 503 dir.getFileSystem(conf).delete(dir, true); 504 } 505 } 506 507 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 508 public void testJobConfiguration() throws Exception { 509 Configuration conf = new Configuration(this.util.getConfiguration()); 510 conf.set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, util.getDataTestDir("testJobConfiguration") 511 .toString()); 512 Job job = new Job(conf); 513 job.setWorkingDirectory(util.getDataTestDir("testJobConfiguration")); 514 Table table = Mockito.mock(Table.class); 515 RegionLocator regionLocator = Mockito.mock(RegionLocator.class); 516 setupMockStartKeys(regionLocator); 517 setupMockTableName(regionLocator); 518 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); 519 assertEquals(job.getNumReduceTasks(), 4); 520 } 521 522 private byte [][] generateRandomStartKeys(int numKeys) { 523 Random random = new Random(); 524 byte[][] ret = new byte[numKeys][]; 525 // first region start key is always empty 526 ret[0] = HConstants.EMPTY_BYTE_ARRAY; 527 for (int i = 1; i < numKeys; i++) { 528 ret[i] = 529 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH); 530 } 531 return ret; 532 } 533 534 private byte[][] generateRandomSplitKeys(int numKeys) { 535 Random random = new Random(); 536 byte[][] ret = new byte[numKeys][]; 537 for (int i = 0; i < numKeys; i++) { 538 ret[i] = 539 PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH); 540 } 541 return ret; 542 } 543 544 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 545 public void testMRIncrementalLoad() throws Exception { 546 LOG.info("\nStarting test testMRIncrementalLoad\n"); 547 doIncrementalLoadTest(false, false, false, "testMRIncrementalLoad"); 548 } 549 550 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 551 public void testMRIncrementalLoadWithSplit() throws Exception { 552 LOG.info("\nStarting test testMRIncrementalLoadWithSplit\n"); 553 doIncrementalLoadTest(true, false, false, "testMRIncrementalLoadWithSplit"); 554 } 555 556 /** 557 * Test for HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY = true 558 * This test could only check the correctness of original logic if LOCALITY_SENSITIVE_CONF_KEY 559 * is set to true. Because MiniHBaseCluster always run with single hostname (and different ports), 560 * it's not possible to check the region locality by comparing region locations and DN hostnames. 561 * When MiniHBaseCluster supports explicit hostnames parameter (just like MiniDFSCluster does), 562 * we could test region locality features more easily. 563 */ 564 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 565 public void testMRIncrementalLoadWithLocality() throws Exception { 566 LOG.info("\nStarting test testMRIncrementalLoadWithLocality\n"); 567 doIncrementalLoadTest(false, true, false, "testMRIncrementalLoadWithLocality1"); 568 doIncrementalLoadTest(true, true, false, "testMRIncrementalLoadWithLocality2"); 569 } 570 571 //@Ignore("Wahtevs") 572 @Test 573 public void testMRIncrementalLoadWithPutSortReducer() throws Exception { 574 LOG.info("\nStarting test testMRIncrementalLoadWithPutSortReducer\n"); 575 doIncrementalLoadTest(false, false, true, "testMRIncrementalLoadWithPutSortReducer"); 576 } 577 578 private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, 579 boolean putSortReducer, String tableStr) throws Exception { 580 doIncrementalLoadTest(shouldChangeRegions, shouldKeepLocality, putSortReducer, 581 Arrays.asList(tableStr)); 582 } 583 584 @Test 585 public void testMultiMRIncrementalLoadWithPutSortReducer() throws Exception { 586 LOG.info("\nStarting test testMultiMRIncrementalLoadWithPutSortReducer\n"); 587 doIncrementalLoadTest(false, false, true, 588 Arrays.stream(TABLE_NAMES).map(TableName::getNameAsString).collect(Collectors.toList 589 ())); 590 } 591 592 private void doIncrementalLoadTest(boolean shouldChangeRegions, boolean shouldKeepLocality, 593 boolean putSortReducer, List<String> tableStr) throws Exception { 594 util = new HBaseTestingUtility(); 595 Configuration conf = util.getConfiguration(); 596 conf.setBoolean(MultiTableHFileOutputFormat.LOCALITY_SENSITIVE_CONF_KEY, shouldKeepLocality); 597 int hostCount = 1; 598 int regionNum = 5; 599 if (shouldKeepLocality) { 600 // We should change host count higher than hdfs replica count when MiniHBaseCluster supports 601 // explicit hostnames parameter just like MiniDFSCluster does. 602 hostCount = 3; 603 regionNum = 20; 604 } 605 606 String[] hostnames = new String[hostCount]; 607 for (int i = 0; i < hostCount; ++i) { 608 hostnames[i] = "datanode_" + i; 609 } 610 util.startMiniCluster(1, hostCount, hostnames); 611 612 Map<String, Table> allTables = new HashMap<>(tableStr.size()); 613 List<HFileOutputFormat2.TableInfo> tableInfo = new ArrayList<>(tableStr.size()); 614 boolean writeMultipleTables = tableStr.size() > 1; 615 for (String tableStrSingle : tableStr) { 616 byte[][] splitKeys = generateRandomSplitKeys(regionNum - 1); 617 TableName tableName = TableName.valueOf(tableStrSingle); 618 Table table = util.createTable(tableName, FAMILIES, splitKeys); 619 620 RegionLocator r = util.getConnection().getRegionLocator(tableName); 621 assertEquals("Should start with empty table", 0, util.countRows(table)); 622 int numRegions = r.getStartKeys().length; 623 assertEquals("Should make " + regionNum + " regions", numRegions, regionNum); 624 625 allTables.put(tableStrSingle, table); 626 tableInfo.add(new HFileOutputFormat2.TableInfo(table.getTableDescriptor(), r)); 627 } 628 Path testDir = util.getDataTestDirOnTestFS("testLocalMRIncrementalLoad"); 629 // Generate the bulk load files 630 runIncrementalPELoad(conf, tableInfo, testDir, putSortReducer); 631 632 for (Table tableSingle : allTables.values()) { 633 // This doesn't write into the table, just makes files 634 assertEquals("HFOF should not touch actual table", 0, util.countRows(tableSingle)); 635 } 636 int numTableDirs = 0; 637 for (FileStatus tf : testDir.getFileSystem(conf).listStatus(testDir)) { 638 Path tablePath = testDir; 639 640 if (writeMultipleTables) { 641 if (allTables.containsKey(tf.getPath().getName())) { 642 ++numTableDirs; 643 tablePath = tf.getPath(); 644 } 645 else { 646 continue; 647 } 648 } 649 650 // Make sure that a directory was created for every CF 651 int dir = 0; 652 for (FileStatus f : tablePath.getFileSystem(conf).listStatus(tablePath)) { 653 for (byte[] family : FAMILIES) { 654 if (Bytes.toString(family).equals(f.getPath().getName())) { 655 ++dir; 656 } 657 } 658 } 659 assertEquals("Column family not found in FS.", FAMILIES.length, dir); 660 } 661 if (writeMultipleTables) { 662 assertEquals("Dir for all input tables not created", numTableDirs, allTables.size()); 663 } 664 665 Admin admin = util.getConnection().getAdmin(); 666 try { 667 // handle the split case 668 if (shouldChangeRegions) { 669 Table chosenTable = allTables.values().iterator().next(); 670 // Choose a semi-random table if multiple tables are available 671 LOG.info("Changing regions in table " + chosenTable.getName().getNameAsString()); 672 admin.disableTable(chosenTable.getName()); 673 util.waitUntilNoRegionsInTransition(); 674 675 util.deleteTable(chosenTable.getName()); 676 byte[][] newSplitKeys = generateRandomSplitKeys(14); 677 Table table = util.createTable(chosenTable.getName(), FAMILIES, newSplitKeys); 678 679 while (util.getConnection().getRegionLocator(chosenTable.getName()) 680 .getAllRegionLocations().size() != 15 || 681 !admin.isTableAvailable(table.getName())) { 682 Thread.sleep(200); 683 LOG.info("Waiting for new region assignment to happen"); 684 } 685 } 686 687 // Perform the actual load 688 for (HFileOutputFormat2.TableInfo singleTableInfo : tableInfo) { 689 Path tableDir = testDir; 690 String tableNameStr = singleTableInfo.getHTableDescriptor().getNameAsString(); 691 LOG.info("Running LoadIncrementalHFiles on table" + tableNameStr); 692 if (writeMultipleTables) { 693 tableDir = new Path(testDir, tableNameStr); 694 } 695 Table currentTable = allTables.get(tableNameStr); 696 TableName currentTableName = currentTable.getName(); 697 new LoadIncrementalHFiles(conf).doBulkLoad(tableDir, admin, currentTable, singleTableInfo 698 .getRegionLocator()); 699 700 // Ensure data shows up 701 int expectedRows = 0; 702 if (putSortReducer) { 703 // no rows should be extracted 704 assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, 705 util.countRows(currentTable)); 706 } else { 707 expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; 708 assertEquals("LoadIncrementalHFiles should put expected data in table", expectedRows, 709 util.countRows(currentTable)); 710 Scan scan = new Scan(); 711 ResultScanner results = currentTable.getScanner(scan); 712 for (Result res : results) { 713 assertEquals(FAMILIES.length, res.rawCells().length); 714 Cell first = res.rawCells()[0]; 715 for (Cell kv : res.rawCells()) { 716 assertTrue(CellUtil.matchingRows(first, kv)); 717 assertTrue(Bytes.equals(CellUtil.cloneValue(first), CellUtil.cloneValue(kv))); 718 } 719 } 720 results.close(); 721 } 722 String tableDigestBefore = util.checksumRows(currentTable); 723 // Check region locality 724 HDFSBlocksDistribution hbd = new HDFSBlocksDistribution(); 725 for (HRegion region : util.getHBaseCluster().getRegions(currentTableName)) { 726 hbd.add(region.getHDFSBlocksDistribution()); 727 } 728 for (String hostname : hostnames) { 729 float locality = hbd.getBlockLocalityIndex(hostname); 730 LOG.info("locality of [" + hostname + "]: " + locality); 731 assertEquals(100, (int) (locality * 100)); 732 } 733 734 // Cause regions to reopen 735 admin.disableTable(currentTableName); 736 while (!admin.isTableDisabled(currentTableName)) { 737 Thread.sleep(200); 738 LOG.info("Waiting for table to disable"); 739 } 740 admin.enableTable(currentTableName); 741 util.waitTableAvailable(currentTableName); 742 assertEquals("Data should remain after reopening of regions", 743 tableDigestBefore, util.checksumRows(currentTable)); 744 } 745 } finally { 746 for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) { 747 tableInfoSingle.getRegionLocator().close(); 748 } 749 for (Entry<String, Table> singleTable : allTables.entrySet() ) { 750 singleTable.getValue().close(); 751 util.deleteTable(singleTable.getValue().getName()); 752 } 753 testDir.getFileSystem(conf).delete(testDir, true); 754 util.shutdownMiniCluster(); 755 } 756 } 757 758 private void runIncrementalPELoad(Configuration conf, List<HFileOutputFormat2.TableInfo> tableInfo, Path outDir, 759 boolean putSortReducer) throws IOException, 760 InterruptedException, ClassNotFoundException { 761 Job job = new Job(conf, "testLocalMRIncrementalLoad"); 762 job.setWorkingDirectory(util.getDataTestDirOnTestFS("runIncrementalPELoad")); 763 job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), 764 MutationSerialization.class.getName(), ResultSerialization.class.getName(), 765 KeyValueSerialization.class.getName()); 766 setupRandomGeneratorMapper(job, putSortReducer); 767 if (tableInfo.size() > 1) { 768 MultiTableHFileOutputFormat.configureIncrementalLoad(job, tableInfo); 769 int sum = 0; 770 for (HFileOutputFormat2.TableInfo tableInfoSingle : tableInfo) { 771 sum += tableInfoSingle.getRegionLocator().getAllRegionLocations().size(); 772 } 773 assertEquals(sum, job.getNumReduceTasks()); 774 } 775 else { 776 RegionLocator regionLocator = tableInfo.get(0).getRegionLocator(); 777 HFileOutputFormat2.configureIncrementalLoad(job, tableInfo.get(0).getHTableDescriptor(), 778 regionLocator); 779 assertEquals(regionLocator.getAllRegionLocations().size(), job.getNumReduceTasks()); 780 } 781 782 FileOutputFormat.setOutputPath(job, outDir); 783 784 assertFalse(util.getTestFileSystem().exists(outDir)) ; 785 786 assertTrue(job.waitForCompletion(true)); 787 } 788 789 /** 790 * Test for {@link HFileOutputFormat2#configureCompression(Configuration, HTableDescriptor)} and 791 * {@link HFileOutputFormat2#createFamilyCompressionMap(Configuration)}. 792 * Tests that the compression map is correctly serialized into 793 * and deserialized from configuration 794 * 795 * @throws IOException 796 */ 797 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 798 public void testSerializeDeserializeFamilyCompressionMap() throws IOException { 799 for (int numCfs = 0; numCfs <= 3; numCfs++) { 800 Configuration conf = new Configuration(this.util.getConfiguration()); 801 Map<String, Compression.Algorithm> familyToCompression = 802 getMockColumnFamiliesForCompression(numCfs); 803 Table table = Mockito.mock(Table.class); 804 setupMockColumnFamiliesForCompression(table, familyToCompression); 805 conf.set(HFileOutputFormat2.COMPRESSION_FAMILIES_CONF_KEY, 806 HFileOutputFormat2.serializeColumnFamilyAttribute 807 (HFileOutputFormat2.compressionDetails, 808 Arrays.asList(table.getTableDescriptor()))); 809 810 // read back family specific compression setting from the configuration 811 Map<byte[], Algorithm> retrievedFamilyToCompressionMap = HFileOutputFormat2 812 .createFamilyCompressionMap(conf); 813 814 // test that we have a value for all column families that matches with the 815 // used mock values 816 for (Entry<String, Algorithm> entry : familyToCompression.entrySet()) { 817 assertEquals("Compression configuration incorrect for column family:" 818 + entry.getKey(), entry.getValue(), 819 retrievedFamilyToCompressionMap.get(entry.getKey().getBytes("UTF-8"))); 820 } 821 } 822 } 823 824 private void setupMockColumnFamiliesForCompression(Table table, 825 Map<String, Compression.Algorithm> familyToCompression) throws IOException { 826 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]); 827 for (Entry<String, Compression.Algorithm> entry : familyToCompression.entrySet()) { 828 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()) 829 .setMaxVersions(1) 830 .setCompressionType(entry.getValue()) 831 .setBlockCacheEnabled(false) 832 .setTimeToLive(0)); 833 } 834 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); 835 } 836 837 /** 838 * @return a map from column family names to compression algorithms for 839 * testing column family compression. Column family names have special characters 840 */ 841 private Map<String, Compression.Algorithm> 842 getMockColumnFamiliesForCompression (int numCfs) { 843 Map<String, Compression.Algorithm> familyToCompression = new HashMap<>(); 844 // use column family names having special characters 845 if (numCfs-- > 0) { 846 familyToCompression.put("Family1!@#!@#&", Compression.Algorithm.LZO); 847 } 848 if (numCfs-- > 0) { 849 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.SNAPPY); 850 } 851 if (numCfs-- > 0) { 852 familyToCompression.put("Family2=asdads&!AASD", Compression.Algorithm.GZ); 853 } 854 if (numCfs-- > 0) { 855 familyToCompression.put("Family3", Compression.Algorithm.NONE); 856 } 857 return familyToCompression; 858 } 859 860 861 /** 862 * Test for {@link HFileOutputFormat2#configureBloomType(HTableDescriptor, Configuration)} and 863 * {@link HFileOutputFormat2#createFamilyBloomTypeMap(Configuration)}. 864 * Tests that the compression map is correctly serialized into 865 * and deserialized from configuration 866 * 867 * @throws IOException 868 */ 869 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 870 public void testSerializeDeserializeFamilyBloomTypeMap() throws IOException { 871 for (int numCfs = 0; numCfs <= 2; numCfs++) { 872 Configuration conf = new Configuration(this.util.getConfiguration()); 873 Map<String, BloomType> familyToBloomType = 874 getMockColumnFamiliesForBloomType(numCfs); 875 Table table = Mockito.mock(Table.class); 876 setupMockColumnFamiliesForBloomType(table, 877 familyToBloomType); 878 conf.set(HFileOutputFormat2.BLOOM_TYPE_FAMILIES_CONF_KEY, 879 HFileOutputFormat2.serializeColumnFamilyAttribute(HFileOutputFormat2.bloomTypeDetails, 880 Arrays.asList(table.getTableDescriptor()))); 881 882 // read back family specific data block encoding settings from the 883 // configuration 884 Map<byte[], BloomType> retrievedFamilyToBloomTypeMap = 885 HFileOutputFormat2 886 .createFamilyBloomTypeMap(conf); 887 888 // test that we have a value for all column families that matches with the 889 // used mock values 890 for (Entry<String, BloomType> entry : familyToBloomType.entrySet()) { 891 assertEquals("BloomType configuration incorrect for column family:" 892 + entry.getKey(), entry.getValue(), 893 retrievedFamilyToBloomTypeMap.get(entry.getKey().getBytes("UTF-8"))); 894 } 895 } 896 } 897 898 private void setupMockColumnFamiliesForBloomType(Table table, 899 Map<String, BloomType> familyToDataBlockEncoding) throws IOException { 900 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]); 901 for (Entry<String, BloomType> entry : familyToDataBlockEncoding.entrySet()) { 902 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()) 903 .setMaxVersions(1) 904 .setBloomFilterType(entry.getValue()) 905 .setBlockCacheEnabled(false) 906 .setTimeToLive(0)); 907 } 908 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); 909 } 910 911 /** 912 * @return a map from column family names to compression algorithms for 913 * testing column family compression. Column family names have special characters 914 */ 915 private Map<String, BloomType> 916 getMockColumnFamiliesForBloomType (int numCfs) { 917 Map<String, BloomType> familyToBloomType = new HashMap<>(); 918 // use column family names having special characters 919 if (numCfs-- > 0) { 920 familyToBloomType.put("Family1!@#!@#&", BloomType.ROW); 921 } 922 if (numCfs-- > 0) { 923 familyToBloomType.put("Family2=asdads&!AASD", 924 BloomType.ROWCOL); 925 } 926 if (numCfs-- > 0) { 927 familyToBloomType.put("Family3", BloomType.NONE); 928 } 929 return familyToBloomType; 930 } 931 932 /** 933 * Test for {@link HFileOutputFormat2#configureBlockSize(HTableDescriptor, Configuration)} and 934 * {@link HFileOutputFormat2#createFamilyBlockSizeMap(Configuration)}. 935 * Tests that the compression map is correctly serialized into 936 * and deserialized from configuration 937 * 938 * @throws IOException 939 */ 940 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 941 public void testSerializeDeserializeFamilyBlockSizeMap() throws IOException { 942 for (int numCfs = 0; numCfs <= 3; numCfs++) { 943 Configuration conf = new Configuration(this.util.getConfiguration()); 944 Map<String, Integer> familyToBlockSize = 945 getMockColumnFamiliesForBlockSize(numCfs); 946 Table table = Mockito.mock(Table.class); 947 setupMockColumnFamiliesForBlockSize(table, 948 familyToBlockSize); 949 conf.set(HFileOutputFormat2.BLOCK_SIZE_FAMILIES_CONF_KEY, 950 HFileOutputFormat2.serializeColumnFamilyAttribute 951 (HFileOutputFormat2.blockSizeDetails, Arrays.asList(table 952 .getTableDescriptor()))); 953 954 // read back family specific data block encoding settings from the 955 // configuration 956 Map<byte[], Integer> retrievedFamilyToBlockSizeMap = 957 HFileOutputFormat2 958 .createFamilyBlockSizeMap(conf); 959 960 // test that we have a value for all column families that matches with the 961 // used mock values 962 for (Entry<String, Integer> entry : familyToBlockSize.entrySet() 963 ) { 964 assertEquals("BlockSize configuration incorrect for column family:" 965 + entry.getKey(), entry.getValue(), 966 retrievedFamilyToBlockSizeMap.get(entry.getKey().getBytes("UTF-8"))); 967 } 968 } 969 } 970 971 private void setupMockColumnFamiliesForBlockSize(Table table, 972 Map<String, Integer> familyToDataBlockEncoding) throws IOException { 973 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]); 974 for (Entry<String, Integer> entry : familyToDataBlockEncoding.entrySet()) { 975 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()) 976 .setMaxVersions(1) 977 .setBlocksize(entry.getValue()) 978 .setBlockCacheEnabled(false) 979 .setTimeToLive(0)); 980 } 981 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); 982 } 983 984 /** 985 * @return a map from column family names to compression algorithms for 986 * testing column family compression. Column family names have special characters 987 */ 988 private Map<String, Integer> 989 getMockColumnFamiliesForBlockSize (int numCfs) { 990 Map<String, Integer> familyToBlockSize = new HashMap<>(); 991 // use column family names having special characters 992 if (numCfs-- > 0) { 993 familyToBlockSize.put("Family1!@#!@#&", 1234); 994 } 995 if (numCfs-- > 0) { 996 familyToBlockSize.put("Family2=asdads&!AASD", 997 Integer.MAX_VALUE); 998 } 999 if (numCfs-- > 0) { 1000 familyToBlockSize.put("Family2=asdads&!AASD", 1001 Integer.MAX_VALUE); 1002 } 1003 if (numCfs-- > 0) { 1004 familyToBlockSize.put("Family3", 0); 1005 } 1006 return familyToBlockSize; 1007 } 1008 1009 /** 1010 * Test for {@link HFileOutputFormat2#configureDataBlockEncoding(HTableDescriptor, Configuration)} 1011 * and {@link HFileOutputFormat2#createFamilyDataBlockEncodingMap(Configuration)}. 1012 * Tests that the compression map is correctly serialized into 1013 * and deserialized from configuration 1014 * 1015 * @throws IOException 1016 */ 1017 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 1018 public void testSerializeDeserializeFamilyDataBlockEncodingMap() throws IOException { 1019 for (int numCfs = 0; numCfs <= 3; numCfs++) { 1020 Configuration conf = new Configuration(this.util.getConfiguration()); 1021 Map<String, DataBlockEncoding> familyToDataBlockEncoding = 1022 getMockColumnFamiliesForDataBlockEncoding(numCfs); 1023 Table table = Mockito.mock(Table.class); 1024 setupMockColumnFamiliesForDataBlockEncoding(table, 1025 familyToDataBlockEncoding); 1026 HTableDescriptor tableDescriptor = table.getTableDescriptor(); 1027 conf.set(HFileOutputFormat2.DATABLOCK_ENCODING_FAMILIES_CONF_KEY, 1028 HFileOutputFormat2.serializeColumnFamilyAttribute 1029 (HFileOutputFormat2.dataBlockEncodingDetails, Arrays 1030 .asList(tableDescriptor))); 1031 1032 // read back family specific data block encoding settings from the 1033 // configuration 1034 Map<byte[], DataBlockEncoding> retrievedFamilyToDataBlockEncodingMap = 1035 HFileOutputFormat2 1036 .createFamilyDataBlockEncodingMap(conf); 1037 1038 // test that we have a value for all column families that matches with the 1039 // used mock values 1040 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { 1041 assertEquals("DataBlockEncoding configuration incorrect for column family:" 1042 + entry.getKey(), entry.getValue(), 1043 retrievedFamilyToDataBlockEncodingMap.get(entry.getKey().getBytes("UTF-8"))); 1044 } 1045 } 1046 } 1047 1048 private void setupMockColumnFamiliesForDataBlockEncoding(Table table, 1049 Map<String, DataBlockEncoding> familyToDataBlockEncoding) throws IOException { 1050 HTableDescriptor mockTableDescriptor = new HTableDescriptor(TABLE_NAMES[0]); 1051 for (Entry<String, DataBlockEncoding> entry : familyToDataBlockEncoding.entrySet()) { 1052 mockTableDescriptor.addFamily(new HColumnDescriptor(entry.getKey()) 1053 .setMaxVersions(1) 1054 .setDataBlockEncoding(entry.getValue()) 1055 .setBlockCacheEnabled(false) 1056 .setTimeToLive(0)); 1057 } 1058 Mockito.doReturn(mockTableDescriptor).when(table).getTableDescriptor(); 1059 } 1060 1061 /** 1062 * @return a map from column family names to compression algorithms for 1063 * testing column family compression. Column family names have special characters 1064 */ 1065 private Map<String, DataBlockEncoding> 1066 getMockColumnFamiliesForDataBlockEncoding (int numCfs) { 1067 Map<String, DataBlockEncoding> familyToDataBlockEncoding = new HashMap<>(); 1068 // use column family names having special characters 1069 if (numCfs-- > 0) { 1070 familyToDataBlockEncoding.put("Family1!@#!@#&", DataBlockEncoding.DIFF); 1071 } 1072 if (numCfs-- > 0) { 1073 familyToDataBlockEncoding.put("Family2=asdads&!AASD", 1074 DataBlockEncoding.FAST_DIFF); 1075 } 1076 if (numCfs-- > 0) { 1077 familyToDataBlockEncoding.put("Family2=asdads&!AASD", 1078 DataBlockEncoding.PREFIX); 1079 } 1080 if (numCfs-- > 0) { 1081 familyToDataBlockEncoding.put("Family3", DataBlockEncoding.NONE); 1082 } 1083 return familyToDataBlockEncoding; 1084 } 1085 1086 private void setupMockStartKeys(RegionLocator table) throws IOException { 1087 byte[][] mockKeys = new byte[][] { 1088 HConstants.EMPTY_BYTE_ARRAY, 1089 Bytes.toBytes("aaa"), 1090 Bytes.toBytes("ggg"), 1091 Bytes.toBytes("zzz") 1092 }; 1093 Mockito.doReturn(mockKeys).when(table).getStartKeys(); 1094 } 1095 1096 private void setupMockTableName(RegionLocator table) throws IOException { 1097 TableName mockTableName = TableName.valueOf("mock_table"); 1098 Mockito.doReturn(mockTableName).when(table).getName(); 1099 } 1100 1101 /** 1102 * Test that {@link HFileOutputFormat2} RecordWriter uses compression and 1103 * bloom filter settings from the column family descriptor 1104 */ 1105 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 1106 public void testColumnFamilySettings() throws Exception { 1107 Configuration conf = new Configuration(this.util.getConfiguration()); 1108 RecordWriter<ImmutableBytesWritable, Cell> writer = null; 1109 TaskAttemptContext context = null; 1110 Path dir = util.getDataTestDir("testColumnFamilySettings"); 1111 1112 // Setup table descriptor 1113 Table table = Mockito.mock(Table.class); 1114 RegionLocator regionLocator = Mockito.mock(RegionLocator.class); 1115 HTableDescriptor htd = new HTableDescriptor(TABLE_NAMES[0]); 1116 Mockito.doReturn(htd).when(table).getTableDescriptor(); 1117 for (HColumnDescriptor hcd: HBaseTestingUtility.generateColumnDescriptors()) { 1118 htd.addFamily(hcd); 1119 } 1120 1121 // set up the table to return some mock keys 1122 setupMockStartKeys(regionLocator); 1123 1124 try { 1125 // partial map red setup to get an operational writer for testing 1126 // We turn off the sequence file compression, because DefaultCodec 1127 // pollutes the GZip codec pool with an incompatible compressor. 1128 conf.set("io.seqfile.compression.type", "NONE"); 1129 conf.set("hbase.fs.tmp.dir", dir.toString()); 1130 // turn locality off to eliminate getRegionLocation fail-and-retry time when writing kvs 1131 conf.setBoolean(HFileOutputFormat2.LOCALITY_SENSITIVE_CONF_KEY, false); 1132 1133 Job job = new Job(conf, "testLocalMRIncrementalLoad"); 1134 job.setWorkingDirectory(util.getDataTestDirOnTestFS("testColumnFamilySettings")); 1135 setupRandomGeneratorMapper(job, false); 1136 HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator); 1137 FileOutputFormat.setOutputPath(job, dir); 1138 context = createTestTaskAttemptContext(job); 1139 HFileOutputFormat2 hof = new HFileOutputFormat2(); 1140 writer = hof.getRecordWriter(context); 1141 1142 // write out random rows 1143 writeRandomKeyValues(writer, context, htd.getFamiliesKeys(), ROWSPERSPLIT); 1144 writer.close(context); 1145 1146 // Make sure that a directory was created for every CF 1147 FileSystem fs = dir.getFileSystem(conf); 1148 1149 // commit so that the filesystem has one directory per column family 1150 hof.getOutputCommitter(context).commitTask(context); 1151 hof.getOutputCommitter(context).commitJob(context); 1152 FileStatus[] families = FSUtils.listStatus(fs, dir, new FSUtils.FamilyDirFilter(fs)); 1153 assertEquals(htd.getFamilies().size(), families.length); 1154 for (FileStatus f : families) { 1155 String familyStr = f.getPath().getName(); 1156 HColumnDescriptor hcd = htd.getFamily(Bytes.toBytes(familyStr)); 1157 // verify that the compression on this file matches the configured 1158 // compression 1159 Path dataFilePath = fs.listStatus(f.getPath())[0].getPath(); 1160 Reader reader = HFile.createReader(fs, dataFilePath, new CacheConfig(conf), true, conf); 1161 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1162 1163 byte[] bloomFilter = fileInfo.get(BLOOM_FILTER_TYPE_KEY); 1164 if (bloomFilter == null) bloomFilter = Bytes.toBytes("NONE"); 1165 assertEquals("Incorrect bloom filter used for column family " + familyStr + 1166 "(reader: " + reader + ")", 1167 hcd.getBloomFilterType(), BloomType.valueOf(Bytes.toString(bloomFilter))); 1168 assertEquals("Incorrect compression used for column family " + familyStr + 1169 "(reader: " + reader + ")", hcd.getCompressionType(), reader.getFileContext().getCompression()); 1170 } 1171 } finally { 1172 dir.getFileSystem(conf).delete(dir, true); 1173 } 1174 } 1175 1176 /** 1177 * Write random values to the writer assuming a table created using 1178 * {@link #FAMILIES} as column family descriptors 1179 */ 1180 private void writeRandomKeyValues(RecordWriter<ImmutableBytesWritable, Cell> writer, 1181 TaskAttemptContext context, Set<byte[]> families, int numRows) 1182 throws IOException, InterruptedException { 1183 byte keyBytes[] = new byte[Bytes.SIZEOF_INT]; 1184 int valLength = 10; 1185 byte valBytes[] = new byte[valLength]; 1186 1187 int taskId = context.getTaskAttemptID().getTaskID().getId(); 1188 assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!"; 1189 final byte [] qualifier = Bytes.toBytes("data"); 1190 Random random = new Random(); 1191 for (int i = 0; i < numRows; i++) { 1192 1193 Bytes.putInt(keyBytes, 0, i); 1194 random.nextBytes(valBytes); 1195 ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes); 1196 1197 for (byte[] family : families) { 1198 Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes); 1199 writer.write(key, kv); 1200 } 1201 } 1202 } 1203 1204 /** 1205 * This test is to test the scenario happened in HBASE-6901. 1206 * All files are bulk loaded and excluded from minor compaction. 1207 * Without the fix of HBASE-6901, an ArrayIndexOutOfBoundsException 1208 * will be thrown. 1209 */ 1210 @Ignore ("Flakey: See HBASE-9051") @Test 1211 public void testExcludeAllFromMinorCompaction() throws Exception { 1212 Configuration conf = util.getConfiguration(); 1213 conf.setInt("hbase.hstore.compaction.min", 2); 1214 generateRandomStartKeys(5); 1215 1216 util.startMiniCluster(); 1217 try (Connection conn = ConnectionFactory.createConnection(); 1218 Admin admin = conn.getAdmin(); 1219 Table table = util.createTable(TABLE_NAMES[0], FAMILIES); 1220 RegionLocator locator = conn.getRegionLocator(TABLE_NAMES[0])) { 1221 final FileSystem fs = util.getDFSCluster().getFileSystem(); 1222 assertEquals("Should start with empty table", 0, util.countRows(table)); 1223 1224 // deep inspection: get the StoreFile dir 1225 final Path storePath = new Path( 1226 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]), 1227 new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(), 1228 Bytes.toString(FAMILIES[0]))); 1229 assertEquals(0, fs.listStatus(storePath).length); 1230 1231 // Generate two bulk load files 1232 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", 1233 true); 1234 1235 for (int i = 0; i < 2; i++) { 1236 Path testDir = util.getDataTestDirOnTestFS("testExcludeAllFromMinorCompaction_" + i); 1237 runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table 1238 .getTableDescriptor(), conn.getRegionLocator(TABLE_NAMES[0]))), testDir, false); 1239 // Perform the actual load 1240 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, locator); 1241 } 1242 1243 // Ensure data shows up 1244 int expectedRows = 2 * NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; 1245 assertEquals("LoadIncrementalHFiles should put expected data in table", 1246 expectedRows, util.countRows(table)); 1247 1248 // should have a second StoreFile now 1249 assertEquals(2, fs.listStatus(storePath).length); 1250 1251 // minor compactions shouldn't get rid of the file 1252 admin.compact(TABLE_NAMES[0]); 1253 try { 1254 quickPoll(new Callable<Boolean>() { 1255 @Override 1256 public Boolean call() throws Exception { 1257 List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]); 1258 for (HRegion region : regions) { 1259 for (HStore store : region.getStores()) { 1260 store.closeAndArchiveCompactedFiles(); 1261 } 1262 } 1263 return fs.listStatus(storePath).length == 1; 1264 } 1265 }, 5000); 1266 throw new IOException("SF# = " + fs.listStatus(storePath).length); 1267 } catch (AssertionError ae) { 1268 // this is expected behavior 1269 } 1270 1271 // a major compaction should work though 1272 admin.majorCompact(TABLE_NAMES[0]); 1273 quickPoll(new Callable<Boolean>() { 1274 @Override 1275 public Boolean call() throws Exception { 1276 List<HRegion> regions = util.getMiniHBaseCluster().getRegions(TABLE_NAMES[0]); 1277 for (HRegion region : regions) { 1278 for (HStore store : region.getStores()) { 1279 store.closeAndArchiveCompactedFiles(); 1280 } 1281 } 1282 return fs.listStatus(storePath).length == 1; 1283 } 1284 }, 5000); 1285 1286 } finally { 1287 util.shutdownMiniCluster(); 1288 } 1289 } 1290 1291 @Ignore("Goes zombie too frequently; needs work. See HBASE-14563") @Test 1292 public void testExcludeMinorCompaction() throws Exception { 1293 Configuration conf = util.getConfiguration(); 1294 conf.setInt("hbase.hstore.compaction.min", 2); 1295 generateRandomStartKeys(5); 1296 1297 util.startMiniCluster(); 1298 try (Connection conn = ConnectionFactory.createConnection(conf); 1299 Admin admin = conn.getAdmin()){ 1300 Path testDir = util.getDataTestDirOnTestFS("testExcludeMinorCompaction"); 1301 final FileSystem fs = util.getDFSCluster().getFileSystem(); 1302 Table table = util.createTable(TABLE_NAMES[0], FAMILIES); 1303 assertEquals("Should start with empty table", 0, util.countRows(table)); 1304 1305 // deep inspection: get the StoreFile dir 1306 final Path storePath = new Path( 1307 FSUtils.getTableDir(FSUtils.getRootDir(conf), TABLE_NAMES[0]), 1308 new Path(admin.getTableRegions(TABLE_NAMES[0]).get(0).getEncodedName(), 1309 Bytes.toString(FAMILIES[0]))); 1310 assertEquals(0, fs.listStatus(storePath).length); 1311 1312 // put some data in it and flush to create a storefile 1313 Put p = new Put(Bytes.toBytes("test")); 1314 p.addColumn(FAMILIES[0], Bytes.toBytes("1"), Bytes.toBytes("1")); 1315 table.put(p); 1316 admin.flush(TABLE_NAMES[0]); 1317 assertEquals(1, util.countRows(table)); 1318 quickPoll(new Callable<Boolean>() { 1319 @Override 1320 public Boolean call() throws Exception { 1321 return fs.listStatus(storePath).length == 1; 1322 } 1323 }, 5000); 1324 1325 // Generate a bulk load file with more rows 1326 conf.setBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", 1327 true); 1328 1329 RegionLocator regionLocator = conn.getRegionLocator(TABLE_NAMES[0]); 1330 runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(table 1331 .getTableDescriptor(), regionLocator)), testDir, false); 1332 1333 // Perform the actual load 1334 new LoadIncrementalHFiles(conf).doBulkLoad(testDir, admin, table, regionLocator); 1335 1336 // Ensure data shows up 1337 int expectedRows = NMapInputFormat.getNumMapTasks(conf) * ROWSPERSPLIT; 1338 assertEquals("LoadIncrementalHFiles should put expected data in table", 1339 expectedRows + 1, util.countRows(table)); 1340 1341 // should have a second StoreFile now 1342 assertEquals(2, fs.listStatus(storePath).length); 1343 1344 // minor compactions shouldn't get rid of the file 1345 admin.compact(TABLE_NAMES[0]); 1346 try { 1347 quickPoll(new Callable<Boolean>() { 1348 @Override 1349 public Boolean call() throws Exception { 1350 return fs.listStatus(storePath).length == 1; 1351 } 1352 }, 5000); 1353 throw new IOException("SF# = " + fs.listStatus(storePath).length); 1354 } catch (AssertionError ae) { 1355 // this is expected behavior 1356 } 1357 1358 // a major compaction should work though 1359 admin.majorCompact(TABLE_NAMES[0]); 1360 quickPoll(new Callable<Boolean>() { 1361 @Override 1362 public Boolean call() throws Exception { 1363 return fs.listStatus(storePath).length == 1; 1364 } 1365 }, 5000); 1366 1367 } finally { 1368 util.shutdownMiniCluster(); 1369 } 1370 } 1371 1372 private void quickPoll(Callable<Boolean> c, int waitMs) throws Exception { 1373 int sleepMs = 10; 1374 int retries = (int) Math.ceil(((double) waitMs) / sleepMs); 1375 while (retries-- > 0) { 1376 if (c.call().booleanValue()) { 1377 return; 1378 } 1379 Thread.sleep(sleepMs); 1380 } 1381 fail(); 1382 } 1383 1384 public static void main(String args[]) throws Exception { 1385 new TestHFileOutputFormat2().manualTest(args); 1386 } 1387 1388 public void manualTest(String args[]) throws Exception { 1389 Configuration conf = HBaseConfiguration.create(); 1390 util = new HBaseTestingUtility(conf); 1391 if ("newtable".equals(args[0])) { 1392 TableName tname = TableName.valueOf(args[1]); 1393 byte[][] splitKeys = generateRandomSplitKeys(4); 1394 Table table = util.createTable(tname, FAMILIES, splitKeys); 1395 } else if ("incremental".equals(args[0])) { 1396 TableName tname = TableName.valueOf(args[1]); 1397 try(Connection c = ConnectionFactory.createConnection(conf); 1398 Admin admin = c.getAdmin(); 1399 RegionLocator regionLocator = c.getRegionLocator(tname)) { 1400 Path outDir = new Path("incremental-out"); 1401 runIncrementalPELoad(conf, Arrays.asList(new HFileOutputFormat2.TableInfo(admin 1402 .getTableDescriptor(tname), regionLocator)), outDir, false); 1403 } 1404 } else { 1405 throw new RuntimeException( 1406 "usage: TestHFileOutputFormat2 newtable | incremental"); 1407 } 1408 } 1409 1410 @Test 1411 public void testBlockStoragePolicy() throws Exception { 1412 util = new HBaseTestingUtility(); 1413 Configuration conf = util.getConfiguration(); 1414 conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY, "ALL_SSD"); 1415 1416 conf.set(HFileOutputFormat2.STORAGE_POLICY_PROPERTY_CF_PREFIX + 1417 Bytes.toString(HFileOutputFormat2.combineTableNameSuffix( 1418 TABLE_NAMES[0].getName(), FAMILIES[0])), "ONE_SSD"); 1419 Path cf1Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[0])); 1420 Path cf2Dir = new Path(util.getDataTestDir(), Bytes.toString(FAMILIES[1])); 1421 util.startMiniDFSCluster(3); 1422 FileSystem fs = util.getDFSCluster().getFileSystem(); 1423 try { 1424 fs.mkdirs(cf1Dir); 1425 fs.mkdirs(cf2Dir); 1426 1427 // the original block storage policy would be HOT 1428 String spA = getStoragePolicyName(fs, cf1Dir); 1429 String spB = getStoragePolicyName(fs, cf2Dir); 1430 LOG.debug("Storage policy of cf 0: [" + spA + "]."); 1431 LOG.debug("Storage policy of cf 1: [" + spB + "]."); 1432 assertEquals("HOT", spA); 1433 assertEquals("HOT", spB); 1434 1435 // alter table cf schema to change storage policies 1436 HFileOutputFormat2.configureStoragePolicy(conf, fs, 1437 HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[0]), cf1Dir); 1438 HFileOutputFormat2.configureStoragePolicy(conf, fs, 1439 HFileOutputFormat2.combineTableNameSuffix(TABLE_NAMES[0].getName(), FAMILIES[1]), cf2Dir); 1440 spA = getStoragePolicyName(fs, cf1Dir); 1441 spB = getStoragePolicyName(fs, cf2Dir); 1442 LOG.debug("Storage policy of cf 0: [" + spA + "]."); 1443 LOG.debug("Storage policy of cf 1: [" + spB + "]."); 1444 assertNotNull(spA); 1445 assertEquals("ONE_SSD", spA); 1446 assertNotNull(spB); 1447 assertEquals("ALL_SSD", spB); 1448 } finally { 1449 fs.delete(cf1Dir, true); 1450 fs.delete(cf2Dir, true); 1451 util.shutdownMiniDFSCluster(); 1452 } 1453 } 1454 1455 private String getStoragePolicyName(FileSystem fs, Path path) { 1456 try { 1457 Object blockStoragePolicySpi = ReflectionUtils.invokeMethod(fs, "getStoragePolicy", path); 1458 return (String) ReflectionUtils.invokeMethod(blockStoragePolicySpi, "getName"); 1459 } catch (Exception e) { 1460 // Maybe fail because of using old HDFS version, try the old way 1461 if (LOG.isTraceEnabled()) { 1462 LOG.trace("Failed to get policy directly", e); 1463 } 1464 String policy = getStoragePolicyNameForOldHDFSVersion(fs, path); 1465 return policy == null ? "HOT" : policy;// HOT by default 1466 } 1467 } 1468 1469 private String getStoragePolicyNameForOldHDFSVersion(FileSystem fs, Path path) { 1470 try { 1471 if (fs instanceof DistributedFileSystem) { 1472 DistributedFileSystem dfs = (DistributedFileSystem) fs; 1473 HdfsFileStatus status = dfs.getClient().getFileInfo(path.toUri().getPath()); 1474 if (null != status) { 1475 byte storagePolicyId = status.getStoragePolicy(); 1476 Field idUnspecified = BlockStoragePolicySuite.class.getField("ID_UNSPECIFIED"); 1477 if (storagePolicyId != idUnspecified.getByte(BlockStoragePolicySuite.class)) { 1478 BlockStoragePolicy[] policies = dfs.getStoragePolicies(); 1479 for (BlockStoragePolicy policy : policies) { 1480 if (policy.getId() == storagePolicyId) { 1481 return policy.getName(); 1482 } 1483 } 1484 } 1485 } 1486 } 1487 } catch (Throwable e) { 1488 LOG.warn("failed to get block storage policy of [" + path + "]", e); 1489 } 1490 1491 return null; 1492 } 1493} 1494