001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.io.InterruptedIOException; 023import java.util.ArrayList; 024import java.util.Collection; 025import java.util.List; 026import java.util.Objects; 027import java.util.Optional; 028import java.util.UUID; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.fs.FSDataInputStream; 031import org.apache.hadoop.fs.FSDataOutputStream; 032import org.apache.hadoop.fs.FileStatus; 033import org.apache.hadoop.fs.FileSystem; 034import org.apache.hadoop.fs.FileUtil; 035import org.apache.hadoop.fs.LocatedFileStatus; 036import org.apache.hadoop.fs.Path; 037import org.apache.hadoop.fs.permission.FsPermission; 038import org.apache.hadoop.hbase.Cell; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.PrivateCellUtil; 041import org.apache.hadoop.hbase.backup.HFileArchiver; 042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 043import org.apache.hadoop.hbase.client.RegionInfo; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.hadoop.hbase.fs.HFileSystem; 046import org.apache.hadoop.hbase.io.Reference; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.FSHDFSUtils; 049import org.apache.hadoop.hbase.util.FSUtils; 050import org.apache.hadoop.hbase.util.Pair; 051import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 052import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 057 058import edu.umd.cs.findbugs.annotations.Nullable; 059 060/** 061 * View to an on-disk Region. 062 * Provides the set of methods necessary to interact with the on-disk region data. 063 */ 064@InterfaceAudience.Private 065public class HRegionFileSystem { 066 private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class); 067 068 /** Name of the region info file that resides just under the region directory. */ 069 public final static String REGION_INFO_FILE = ".regioninfo"; 070 071 /** Temporary subdirectory of the region directory used for merges. */ 072 public static final String REGION_MERGES_DIR = ".merges"; 073 074 /** Temporary subdirectory of the region directory used for splits. */ 075 public static final String REGION_SPLITS_DIR = ".splits"; 076 077 /** Temporary subdirectory of the region directory used for compaction output. */ 078 @VisibleForTesting static final String REGION_TEMP_DIR = ".tmp"; 079 080 private final RegionInfo regionInfo; 081 //regionInfo for interacting with FS (getting encodedName, etc) 082 private final RegionInfo regionInfoForFs; 083 private final Configuration conf; 084 private final Path tableDir; 085 private final FileSystem fs; 086 private final Path regionDir; 087 088 /** 089 * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the 090 * client level. 091 */ 092 private final int hdfsClientRetriesNumber; 093 private final int baseSleepBeforeRetries; 094 private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10; 095 private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000; 096 097 /** 098 * Create a view to the on-disk region 099 * @param conf the {@link Configuration} to use 100 * @param fs {@link FileSystem} that contains the region 101 * @param tableDir {@link Path} to where the table is being stored 102 * @param regionInfo {@link RegionInfo} for region 103 */ 104 HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir, 105 final RegionInfo regionInfo) { 106 this.fs = fs; 107 this.conf = conf; 108 this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null"); 109 this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null"); 110 this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo); 111 this.regionDir = FSUtils.getRegionDir(tableDir, regionInfo); 112 this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number", 113 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 114 this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries", 115 DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 116 } 117 118 /** @return the underlying {@link FileSystem} */ 119 public FileSystem getFileSystem() { 120 return this.fs; 121 } 122 123 /** @return the {@link RegionInfo} that describe this on-disk region view */ 124 public RegionInfo getRegionInfo() { 125 return this.regionInfo; 126 } 127 128 public RegionInfo getRegionInfoForFS() { 129 return this.regionInfoForFs; 130 } 131 132 /** @return {@link Path} to the region's root directory. */ 133 public Path getTableDir() { 134 return this.tableDir; 135 } 136 137 /** @return {@link Path} to the region directory. */ 138 public Path getRegionDir() { 139 return regionDir; 140 } 141 142 // =========================================================================== 143 // Temp Helpers 144 // =========================================================================== 145 /** @return {@link Path} to the region's temp directory, used for file creations */ 146 Path getTempDir() { 147 return new Path(getRegionDir(), REGION_TEMP_DIR); 148 } 149 150 /** 151 * Clean up any temp detritus that may have been left around from previous operation attempts. 152 */ 153 void cleanupTempDir() throws IOException { 154 deleteDir(getTempDir()); 155 } 156 157 // =========================================================================== 158 // Store/StoreFile Helpers 159 // =========================================================================== 160 /** 161 * Returns the directory path of the specified family 162 * @param familyName Column Family Name 163 * @return {@link Path} to the directory of the specified family 164 */ 165 public Path getStoreDir(final String familyName) { 166 return new Path(this.getRegionDir(), familyName); 167 } 168 169 /** 170 * Create the store directory for the specified family name 171 * @param familyName Column Family Name 172 * @return {@link Path} to the directory of the specified family 173 * @throws IOException if the directory creation fails. 174 */ 175 Path createStoreDir(final String familyName) throws IOException { 176 Path storeDir = getStoreDir(familyName); 177 if(!fs.exists(storeDir) && !createDir(storeDir)) 178 throw new IOException("Failed creating "+storeDir); 179 return storeDir; 180 } 181 182 /** 183 * Set the directory of CF to the specified storage policy. <br> 184 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>, 185 * <i>"COLD"</i> <br> 186 * <br> 187 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details. 188 * @param familyName The name of column family. 189 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. 190 * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 191 * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'. 192 */ 193 public void setStoragePolicy(String familyName, String policyName) { 194 FSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName); 195 } 196 197 /** 198 * Get the storage policy of the directory of CF. 199 * @param familyName The name of column family. 200 * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception 201 * thrown when trying to get policy 202 */ 203 @Nullable 204 public String getStoragePolicyName(String familyName) { 205 if (this.fs instanceof HFileSystem) { 206 Path storeDir = getStoreDir(familyName); 207 return ((HFileSystem) this.fs).getStoragePolicyName(storeDir); 208 } 209 210 return null; 211 } 212 213 /** 214 * Returns the store files available for the family. 215 * This methods performs the filtering based on the valid store files. 216 * @param familyName Column Family Name 217 * @return a set of {@link StoreFileInfo} for the specified family. 218 */ 219 public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException { 220 return getStoreFiles(Bytes.toString(familyName)); 221 } 222 223 public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException { 224 return getStoreFiles(familyName, true); 225 } 226 227 /** 228 * Returns the store files available for the family. 229 * This methods performs the filtering based on the valid store files. 230 * @param familyName Column Family Name 231 * @return a set of {@link StoreFileInfo} for the specified family. 232 */ 233 public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate) 234 throws IOException { 235 Path familyDir = getStoreDir(familyName); 236 FileStatus[] files = FSUtils.listStatus(this.fs, familyDir); 237 if (files == null) { 238 if (LOG.isTraceEnabled()) { 239 LOG.trace("No StoreFiles for: " + familyDir); 240 } 241 return null; 242 } 243 244 ArrayList<StoreFileInfo> storeFiles = new ArrayList<>(files.length); 245 for (FileStatus status: files) { 246 if (validate && !StoreFileInfo.isValid(status)) { 247 LOG.warn("Invalid StoreFile: " + status.getPath()); 248 continue; 249 } 250 StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, 251 regionInfoForFs, familyName, status.getPath()); 252 storeFiles.add(info); 253 254 } 255 return storeFiles; 256 } 257 258 /** 259 * Returns the store files' LocatedFileStatus which available for the family. 260 * This methods performs the filtering based on the valid store files. 261 * @param familyName Column Family Name 262 * @return a list of store files' LocatedFileStatus for the specified family. 263 */ 264 public static List<LocatedFileStatus> getStoreFilesLocatedStatus( 265 final HRegionFileSystem regionfs, final String familyName, 266 final boolean validate) throws IOException { 267 Path familyDir = regionfs.getStoreDir(familyName); 268 List<LocatedFileStatus> locatedFileStatuses = FSUtils.listLocatedStatus( 269 regionfs.getFileSystem(), familyDir); 270 if (locatedFileStatuses == null) { 271 if (LOG.isTraceEnabled()) { 272 LOG.trace("No StoreFiles for: " + familyDir); 273 } 274 return null; 275 } 276 277 List<LocatedFileStatus> validStoreFiles = Lists.newArrayList(); 278 for (LocatedFileStatus status : locatedFileStatuses) { 279 if (validate && !StoreFileInfo.isValid(status)) { 280 LOG.warn("Invalid StoreFile: " + status.getPath()); 281 } else { 282 validStoreFiles.add(status); 283 } 284 } 285 return validStoreFiles; 286 } 287 288 /** 289 * Return Qualified Path of the specified family/file 290 * 291 * @param familyName Column Family Name 292 * @param fileName File Name 293 * @return The qualified Path for the specified family/file 294 */ 295 Path getStoreFilePath(final String familyName, final String fileName) { 296 Path familyDir = getStoreDir(familyName); 297 return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory()); 298 } 299 300 /** 301 * Return the store file information of the specified family/file. 302 * 303 * @param familyName Column Family Name 304 * @param fileName File Name 305 * @return The {@link StoreFileInfo} for the specified family/file 306 */ 307 StoreFileInfo getStoreFileInfo(final String familyName, final String fileName) 308 throws IOException { 309 Path familyDir = getStoreDir(familyName); 310 return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, 311 regionInfoForFs, familyName, new Path(familyDir, fileName)); 312 } 313 314 /** 315 * Returns true if the specified family has reference files 316 * @param familyName Column Family Name 317 * @return true if family contains reference files 318 * @throws IOException 319 */ 320 public boolean hasReferences(final String familyName) throws IOException { 321 Path storeDir = getStoreDir(familyName); 322 FileStatus[] files = FSUtils.listStatus(fs, storeDir); 323 if (files != null) { 324 for(FileStatus stat: files) { 325 if(stat.isDirectory()) { 326 continue; 327 } 328 if(StoreFileInfo.isReference(stat.getPath())) { 329 if (LOG.isTraceEnabled()) LOG.trace("Reference " + stat.getPath()); 330 return true; 331 } 332 } 333 } 334 return false; 335 } 336 337 /** 338 * Check whether region has Reference file 339 * @param htd table desciptor of the region 340 * @return true if region has reference file 341 * @throws IOException 342 */ 343 public boolean hasReferences(final TableDescriptor htd) throws IOException { 344 for (ColumnFamilyDescriptor family : htd.getColumnFamilies()) { 345 if (hasReferences(family.getNameAsString())) { 346 return true; 347 } 348 } 349 return false; 350 } 351 352 /** 353 * @return the set of families present on disk 354 * @throws IOException 355 */ 356 public Collection<String> getFamilies() throws IOException { 357 FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs)); 358 if (fds == null) return null; 359 360 ArrayList<String> families = new ArrayList<>(fds.length); 361 for (FileStatus status: fds) { 362 families.add(status.getPath().getName()); 363 } 364 365 return families; 366 } 367 368 /** 369 * Remove the region family from disk, archiving the store files. 370 * @param familyName Column Family Name 371 * @throws IOException if an error occours during the archiving 372 */ 373 public void deleteFamily(final String familyName) throws IOException { 374 // archive family store files 375 HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName)); 376 377 // delete the family folder 378 Path familyDir = getStoreDir(familyName); 379 if(fs.exists(familyDir) && !deleteDir(familyDir)) 380 throw new IOException("Could not delete family " + familyName 381 + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "(" 382 + regionInfoForFs.getEncodedName() + ")"); 383 } 384 385 /** 386 * Generate a unique file name, used by createTempName() and commitStoreFile() 387 * @param suffix extra information to append to the generated name 388 * @return Unique file name 389 */ 390 private static String generateUniqueName(final String suffix) { 391 String name = UUID.randomUUID().toString().replaceAll("-", ""); 392 if (suffix != null) name += suffix; 393 return name; 394 } 395 396 /** 397 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() 398 * to get a safer file creation. 399 * <code> 400 * Path file = fs.createTempName(); 401 * ...StoreFile.Writer(file)... 402 * fs.commitStoreFile("family", file); 403 * </code> 404 * 405 * @return Unique {@link Path} of the temporary file 406 */ 407 public Path createTempName() { 408 return createTempName(null); 409 } 410 411 /** 412 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() 413 * to get a safer file creation. 414 * <code> 415 * Path file = fs.createTempName(); 416 * ...StoreFile.Writer(file)... 417 * fs.commitStoreFile("family", file); 418 * </code> 419 * 420 * @param suffix extra information to append to the generated name 421 * @return Unique {@link Path} of the temporary file 422 */ 423 public Path createTempName(final String suffix) { 424 return new Path(getTempDir(), generateUniqueName(suffix)); 425 } 426 427 /** 428 * Move the file from a build/temp location to the main family store directory. 429 * @param familyName Family that will gain the file 430 * @param buildPath {@link Path} to the file to commit. 431 * @return The new {@link Path} of the committed file 432 * @throws IOException 433 */ 434 public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException { 435 Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false); 436 return commitStoreFile(buildPath, dstPath); 437 } 438 439 /** 440 * Generate the filename in the main family store directory for moving the file from a build/temp 441 * location. 442 * @param familyName Family that will gain the file 443 * @param buildPath {@link Path} to the file to commit. 444 * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number) 445 * @param generateNewName False if you want to keep the buildPath name 446 * @return The new {@link Path} of the to be committed file 447 * @throws IOException 448 */ 449 private Path preCommitStoreFile(final String familyName, final Path buildPath, 450 final long seqNum, final boolean generateNewName) throws IOException { 451 Path storeDir = getStoreDir(familyName); 452 if(!fs.exists(storeDir) && !createDir(storeDir)) 453 throw new IOException("Failed creating " + storeDir); 454 455 String name = buildPath.getName(); 456 if (generateNewName) { 457 name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_"); 458 } 459 Path dstPath = new Path(storeDir, name); 460 if (!fs.exists(buildPath)) { 461 throw new FileNotFoundException(buildPath.toString()); 462 } 463 if (LOG.isDebugEnabled()) { 464 LOG.debug("Committing " + buildPath + " as " + dstPath); 465 } 466 return dstPath; 467 } 468 469 /* 470 * Moves file from staging dir to region dir 471 * @param buildPath {@link Path} to the file to commit. 472 * @param dstPath {@link Path} to the file under region dir 473 * @return The {@link Path} of the committed file 474 * @throws IOException 475 */ 476 Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException { 477 // buildPath exists, therefore not doing an exists() check. 478 if (!rename(buildPath, dstPath)) { 479 throw new IOException("Failed rename of " + buildPath + " to " + dstPath); 480 } 481 return dstPath; 482 } 483 484 /** 485 * Archives the specified store file from the specified family. 486 * @param familyName Family that contains the store files 487 * @param filePath {@link Path} to the store file to remove 488 * @throws IOException if the archiving fails 489 */ 490 public void removeStoreFile(final String familyName, final Path filePath) 491 throws IOException { 492 HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs, 493 this.tableDir, Bytes.toBytes(familyName), filePath); 494 } 495 496 /** 497 * Closes and archives the specified store files from the specified family. 498 * @param familyName Family that contains the store files 499 * @param storeFiles set of store files to remove 500 * @throws IOException if the archiving fails 501 */ 502 public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles) 503 throws IOException { 504 HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs, 505 this.tableDir, Bytes.toBytes(familyName), storeFiles); 506 } 507 508 /** 509 * Bulk load: Add a specified store file to the specified family. 510 * If the source file is on the same different file-system is moved from the 511 * source location to the destination location, otherwise is copied over. 512 * 513 * @param familyName Family that will gain the file 514 * @param srcPath {@link Path} to the file to import 515 * @param seqNum Bulk Load sequence number 516 * @return The destination {@link Path} of the bulk loaded file 517 * @throws IOException 518 */ 519 Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum) 520 throws IOException { 521 // Copy the file if it's on another filesystem 522 FileSystem srcFs = srcPath.getFileSystem(conf); 523 srcPath = srcFs.resolvePath(srcPath); 524 FileSystem realSrcFs = srcPath.getFileSystem(conf); 525 FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs; 526 527 // We can't compare FileSystem instances as equals() includes UGI instance 528 // as part of the comparison and won't work when doing SecureBulkLoad 529 // TODO deal with viewFS 530 if (!FSHDFSUtils.isSameHdfs(conf, realSrcFs, desFs)) { 531 LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " + 532 "the destination store. Copying file over to destination filesystem."); 533 Path tmpPath = createTempName(); 534 FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf); 535 LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath); 536 srcPath = tmpPath; 537 } 538 539 return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true)); 540 } 541 542 // =========================================================================== 543 // Splits Helpers 544 // =========================================================================== 545 /** @return {@link Path} to the temp directory used during split operations */ 546 Path getSplitsDir() { 547 return new Path(getRegionDir(), REGION_SPLITS_DIR); 548 } 549 550 public Path getSplitsDir(final RegionInfo hri) { 551 return new Path(getSplitsDir(), hri.getEncodedName()); 552 } 553 554 /** 555 * Clean up any split detritus that may have been left around from previous split attempts. 556 */ 557 void cleanupSplitsDir() throws IOException { 558 deleteDir(getSplitsDir()); 559 } 560 561 /** 562 * Clean up any split detritus that may have been left around from previous 563 * split attempts. 564 * Call this method on initial region deploy. 565 * @throws IOException 566 */ 567 void cleanupAnySplitDetritus() throws IOException { 568 Path splitdir = this.getSplitsDir(); 569 if (!fs.exists(splitdir)) return; 570 // Look at the splitdir. It could have the encoded names of the daughter 571 // regions we tried to make. See if the daughter regions actually got made 572 // out under the tabledir. If here under splitdir still, then the split did 573 // not complete. Try and do cleanup. This code WILL NOT catch the case 574 // where we successfully created daughter a but regionserver crashed during 575 // the creation of region b. In this case, there'll be an orphan daughter 576 // dir in the filesystem. TOOD: Fix. 577 FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs)); 578 if (daughters != null) { 579 for (FileStatus daughter: daughters) { 580 Path daughterDir = new Path(getTableDir(), daughter.getPath().getName()); 581 if (fs.exists(daughterDir) && !deleteDir(daughterDir)) { 582 throw new IOException("Failed delete of " + daughterDir); 583 } 584 } 585 } 586 cleanupSplitsDir(); 587 LOG.info("Cleaned up old failed split transaction detritus: " + splitdir); 588 } 589 590 /** 591 * Remove daughter region 592 * @param regionInfo daughter {@link RegionInfo} 593 * @throws IOException 594 */ 595 void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException { 596 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName()); 597 if (this.fs.exists(regionDir) && !deleteDir(regionDir)) { 598 throw new IOException("Failed delete of " + regionDir); 599 } 600 } 601 602 /** 603 * Commit a daughter region, moving it from the split temporary directory 604 * to the proper location in the filesystem. 605 * 606 * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo} 607 * @throws IOException 608 */ 609 public Path commitDaughterRegion(final RegionInfo regionInfo) 610 throws IOException { 611 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName()); 612 Path daughterTmpDir = this.getSplitsDir(regionInfo); 613 614 if (fs.exists(daughterTmpDir)) { 615 616 // Write HRI to a file in case we need to recover hbase:meta 617 Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE); 618 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); 619 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 620 621 // Move the daughter temp dir to the table dir 622 if (!rename(daughterTmpDir, regionDir)) { 623 throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir); 624 } 625 } 626 627 return regionDir; 628 } 629 630 /** 631 * Create the region splits directory. 632 */ 633 public void createSplitsDir() throws IOException { 634 Path splitdir = getSplitsDir(); 635 if (fs.exists(splitdir)) { 636 LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it"); 637 if (!deleteDir(splitdir)) { 638 throw new IOException("Failed deletion of " + splitdir 639 + " before creating them again."); 640 } 641 } 642 // splitDir doesn't exists now. No need to do an exists() call for it. 643 if (!createDir(splitdir)) { 644 throw new IOException("Failed create of " + splitdir); 645 } 646 } 647 648 /** 649 * Write out a split reference. Package local so it doesnt leak out of 650 * regionserver. 651 * @param hri {@link RegionInfo} of the destination 652 * @param familyName Column Family Name 653 * @param f File to split. 654 * @param splitRow Split Row 655 * @param top True if we are referring to the top half of the hfile. 656 * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if 657 * this method is invoked on the Master side, then the RegionSplitPolicy will 658 * NOT have a reference to a Region. 659 * @return Path to created reference. 660 * @throws IOException 661 */ 662 public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow, 663 boolean top, RegionSplitPolicy splitPolicy) throws IOException { 664 if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) { 665 // Check whether the split row lies in the range of the store file 666 // If it is outside the range, return directly. 667 f.initReader(); 668 try { 669 if (top) { 670 //check if larger than last key. 671 Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow); 672 Optional<Cell> lastKey = f.getLastKey(); 673 // If lastKey is null means storefile is empty. 674 if (!lastKey.isPresent()) { 675 return null; 676 } 677 if (f.getComparator().compare(splitKey, lastKey.get()) > 0) { 678 return null; 679 } 680 } else { 681 //check if smaller than first key 682 Cell splitKey = PrivateCellUtil.createLastOnRow(splitRow); 683 Optional<Cell> firstKey = f.getFirstKey(); 684 // If firstKey is null means storefile is empty. 685 if (!firstKey.isPresent()) { 686 return null; 687 } 688 if (f.getComparator().compare(splitKey, firstKey.get()) < 0) { 689 return null; 690 } 691 } 692 } finally { 693 f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true); 694 } 695 } 696 697 Path splitDir = new Path(getSplitsDir(hri), familyName); 698 // A reference to the bottom half of the hsf store file. 699 Reference r = 700 top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow); 701 // Add the referred-to regions name as a dot separated suffix. 702 // See REF_NAME_REGEX regex above. The referred-to regions name is 703 // up in the path of the passed in <code>f</code> -- parentdir is family, 704 // then the directory above is the region name. 705 String parentRegionName = regionInfoForFs.getEncodedName(); 706 // Write reference with same file id only with the other region name as 707 // suffix and into the new region location (under same family). 708 Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); 709 return r.write(fs, p); 710 } 711 712 // =========================================================================== 713 // Merge Helpers 714 // =========================================================================== 715 /** @return {@link Path} to the temp directory used during merge operations */ 716 public Path getMergesDir() { 717 return new Path(getRegionDir(), REGION_MERGES_DIR); 718 } 719 720 Path getMergesDir(final RegionInfo hri) { 721 return new Path(getMergesDir(), hri.getEncodedName()); 722 } 723 724 /** 725 * Clean up any merge detritus that may have been left around from previous merge attempts. 726 */ 727 void cleanupMergesDir() throws IOException { 728 deleteDir(getMergesDir()); 729 } 730 731 /** 732 * Remove merged region 733 * @param mergedRegion {@link RegionInfo} 734 * @throws IOException 735 */ 736 public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException { 737 Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName()); 738 if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) { 739 throw new IOException("Failed delete of " + regionDir); 740 } 741 } 742 743 static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException { 744 if (FSUtils.isDistributedFileSystem(fs) || 745 !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) { 746 return fs.mkdirs(dir); 747 } 748 FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 749 return fs.mkdirs(dir, perms); 750 } 751 752 /** 753 * Create the region merges directory. 754 * @throws IOException If merges dir already exists or we fail to create it. 755 * @see HRegionFileSystem#cleanupMergesDir() 756 */ 757 public void createMergesDir() throws IOException { 758 Path mergesdir = getMergesDir(); 759 if (fs.exists(mergesdir)) { 760 LOG.info("The " + mergesdir 761 + " directory exists. Hence deleting it to recreate it"); 762 if (!fs.delete(mergesdir, true)) { 763 throw new IOException("Failed deletion of " + mergesdir 764 + " before creating them again."); 765 } 766 } 767 if (!mkdirs(fs, conf, mergesdir)) 768 throw new IOException("Failed create of " + mergesdir); 769 } 770 771 /** 772 * Write out a merge reference under the given merges directory. Package local 773 * so it doesnt leak out of regionserver. 774 * @param mergedRegion {@link RegionInfo} of the merged region 775 * @param familyName Column Family Name 776 * @param f File to create reference. 777 * @param mergedDir 778 * @return Path to created reference. 779 * @throws IOException 780 */ 781 public Path mergeStoreFile(RegionInfo mergedRegion, String familyName, HStoreFile f, 782 Path mergedDir) throws IOException { 783 Path referenceDir = new Path(new Path(mergedDir, 784 mergedRegion.getEncodedName()), familyName); 785 // A whole reference to the store file. 786 Reference r = Reference.createTopReference(regionInfoForFs.getStartKey()); 787 // Add the referred-to regions name as a dot separated suffix. 788 // See REF_NAME_REGEX regex above. The referred-to regions name is 789 // up in the path of the passed in <code>f</code> -- parentdir is family, 790 // then the directory above is the region name. 791 String mergingRegionName = regionInfoForFs.getEncodedName(); 792 // Write reference with same file id only with the other region name as 793 // suffix and into the new region location (under same family). 794 Path p = new Path(referenceDir, f.getPath().getName() + "." 795 + mergingRegionName); 796 return r.write(fs, p); 797 } 798 799 /** 800 * Commit a merged region, moving it from the merges temporary directory to 801 * the proper location in the filesystem. 802 * @param mergedRegionInfo merged region {@link RegionInfo} 803 * @throws IOException 804 */ 805 public void commitMergedRegion(final RegionInfo mergedRegionInfo) throws IOException { 806 Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName()); 807 Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo); 808 // Move the tmp dir in the expected location 809 if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) { 810 if (!fs.rename(mergedRegionTmpDir, regionDir)) { 811 throw new IOException("Unable to rename " + mergedRegionTmpDir + " to " 812 + regionDir); 813 } 814 } 815 } 816 817 // =========================================================================== 818 // Create/Open/Delete Helpers 819 // =========================================================================== 820 /** 821 * Log the current state of the region 822 * @param LOG log to output information 823 * @throws IOException if an unexpected exception occurs 824 */ 825 void logFileSystemState(final Logger LOG) throws IOException { 826 FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG); 827 } 828 829 /** 830 * @param hri 831 * @return Content of the file we write out to the filesystem under a region 832 * @throws IOException 833 */ 834 private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException { 835 return RegionInfo.toDelimitedByteArray(hri); 836 } 837 838 /** 839 * Create a {@link RegionInfo} from the serialized version on-disk. 840 * @param fs {@link FileSystem} that contains the Region Info file 841 * @param regionDir {@link Path} to the Region Directory that contains the Info file 842 * @return An {@link RegionInfo} instance gotten from the Region Info file. 843 * @throws IOException if an error occurred during file open/read operation. 844 */ 845 public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir) 846 throws IOException { 847 FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE)); 848 try { 849 return RegionInfo.parseFrom(in); 850 } finally { 851 in.close(); 852 } 853 } 854 855 /** 856 * Write the .regioninfo file on-disk. 857 */ 858 private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs, 859 final Path regionInfoFile, final byte[] content) throws IOException { 860 // First check to get the permissions 861 FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 862 // Write the RegionInfo file content 863 FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null); 864 try { 865 out.write(content); 866 } finally { 867 out.close(); 868 } 869 } 870 871 /** 872 * Write out an info file under the stored region directory. Useful recovering mangled regions. 873 * If the regionInfo already exists on-disk, then we fast exit. 874 */ 875 void checkRegionInfoOnFilesystem() throws IOException { 876 // Compose the content of the file so we can compare to length in filesystem. If not same, 877 // rewrite it (it may have been written in the old format using Writables instead of pb). The 878 // pb version is much shorter -- we write now w/o the toString version -- so checking length 879 // only should be sufficient. I don't want to read the file every time to check if it pb 880 // serialized. 881 byte[] content = getRegionInfoFileContent(regionInfoForFs); 882 883 // Verify if the region directory exists before opening a region. We need to do this since if 884 // the region directory doesn't exist we will re-create the region directory and a new HRI 885 // when HRegion.openHRegion() is called. 886 try { 887 FileStatus status = fs.getFileStatus(getRegionDir()); 888 } catch (FileNotFoundException e) { 889 LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() + 890 " on table " + regionInfo.getTable()); 891 } 892 893 try { 894 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 895 FileStatus status = fs.getFileStatus(regionInfoFile); 896 if (status != null && status.getLen() == content.length) { 897 // Then assume the content good and move on. 898 // NOTE: that the length is not sufficient to define the the content matches. 899 return; 900 } 901 902 LOG.info("Rewriting .regioninfo file at: " + regionInfoFile); 903 if (!fs.delete(regionInfoFile, false)) { 904 throw new IOException("Unable to remove existing " + regionInfoFile); 905 } 906 } catch (FileNotFoundException e) { 907 LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() + 908 " on table " + regionInfo.getTable()); 909 } 910 911 // Write HRI to a file in case we need to recover hbase:meta 912 writeRegionInfoOnFilesystem(content, true); 913 } 914 915 /** 916 * Write out an info file under the region directory. Useful recovering mangled regions. 917 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. 918 */ 919 private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException { 920 byte[] content = getRegionInfoFileContent(regionInfoForFs); 921 writeRegionInfoOnFilesystem(content, useTempDir); 922 } 923 924 /** 925 * Write out an info file under the region directory. Useful recovering mangled regions. 926 * @param regionInfoContent serialized version of the {@link RegionInfo} 927 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. 928 */ 929 private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, 930 final boolean useTempDir) throws IOException { 931 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 932 if (useTempDir) { 933 // Create in tmpDir and then move into place in case we crash after 934 // create but before close. If we don't successfully close the file, 935 // subsequent region reopens will fail the below because create is 936 // registered in NN. 937 938 // And then create the file 939 Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE); 940 941 // If datanode crashes or if the RS goes down just before the close is called while trying to 942 // close the created regioninfo file in the .tmp directory then on next 943 // creation we will be getting AlreadyCreatedException. 944 // Hence delete and create the file if exists. 945 if (FSUtils.isExists(fs, tmpPath)) { 946 FSUtils.delete(fs, tmpPath, true); 947 } 948 949 // Write HRI to a file in case we need to recover hbase:meta 950 writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent); 951 952 // Move the created file to the original path 953 if (fs.exists(tmpPath) && !rename(tmpPath, regionInfoFile)) { 954 throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile); 955 } 956 } else { 957 // Write HRI to a file in case we need to recover hbase:meta 958 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 959 } 960 } 961 962 /** 963 * Create a new Region on file-system. 964 * @param conf the {@link Configuration} to use 965 * @param fs {@link FileSystem} from which to add the region 966 * @param tableDir {@link Path} to where the table is being stored 967 * @param regionInfo {@link RegionInfo} for region to be added 968 * @throws IOException if the region creation fails due to a FileSystem exception. 969 */ 970 public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf, 971 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException { 972 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 973 974 // We only create a .regioninfo and the region directory if this is the default region replica 975 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 976 Path regionDir = regionFs.getRegionDir(); 977 if (fs.exists(regionDir)) { 978 LOG.warn("Trying to create a region that already exists on disk: " + regionDir); 979 throw new IOException("The specified region already exists on disk: " + regionDir); 980 } 981 982 // Create the region directory 983 if (!createDirOnFileSystem(fs, conf, regionDir)) { 984 LOG.warn("Unable to create the region directory: " + regionDir); 985 throw new IOException("Unable to create region directory: " + regionDir); 986 } 987 988 // Write HRI to a file in case we need to recover hbase:meta 989 regionFs.writeRegionInfoOnFilesystem(false); 990 } else { 991 if (LOG.isDebugEnabled()) 992 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 993 } 994 return regionFs; 995 } 996 997 /** 998 * Open Region from file-system. 999 * @param conf the {@link Configuration} to use 1000 * @param fs {@link FileSystem} from which to add the region 1001 * @param tableDir {@link Path} to where the table is being stored 1002 * @param regionInfo {@link RegionInfo} for region to be added 1003 * @param readOnly True if you don't want to edit the region data 1004 * @throws IOException if the region creation fails due to a FileSystem exception. 1005 */ 1006 public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf, 1007 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly) 1008 throws IOException { 1009 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 1010 Path regionDir = regionFs.getRegionDir(); 1011 1012 if (!fs.exists(regionDir)) { 1013 LOG.warn("Trying to open a region that do not exists on disk: " + regionDir); 1014 throw new IOException("The specified region do not exists on disk: " + regionDir); 1015 } 1016 1017 if (!readOnly) { 1018 // Cleanup temporary directories 1019 regionFs.cleanupTempDir(); 1020 regionFs.cleanupSplitsDir(); 1021 regionFs.cleanupMergesDir(); 1022 1023 // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta 1024 // Only create HRI if we are the default replica 1025 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 1026 regionFs.checkRegionInfoOnFilesystem(); 1027 } else { 1028 if (LOG.isDebugEnabled()) { 1029 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 1030 } 1031 } 1032 } 1033 1034 return regionFs; 1035 } 1036 1037 /** 1038 * Remove the region from the table directory, archiving the region's hfiles. 1039 * @param conf the {@link Configuration} to use 1040 * @param fs {@link FileSystem} from which to remove the region 1041 * @param tableDir {@link Path} to where the table is being stored 1042 * @param regionInfo {@link RegionInfo} for region to be deleted 1043 * @throws IOException if the request cannot be completed 1044 */ 1045 public static void deleteRegionFromFileSystem(final Configuration conf, 1046 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException { 1047 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 1048 Path regionDir = regionFs.getRegionDir(); 1049 1050 if (!fs.exists(regionDir)) { 1051 LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir); 1052 return; 1053 } 1054 1055 if (LOG.isDebugEnabled()) { 1056 LOG.debug("DELETING region " + regionDir); 1057 } 1058 1059 // Archive region 1060 Path rootDir = FSUtils.getRootDir(conf); 1061 HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir); 1062 1063 // Delete empty region dir 1064 if (!fs.delete(regionDir, true)) { 1065 LOG.warn("Failed delete of " + regionDir); 1066 } 1067 } 1068 1069 /** 1070 * Creates a directory. Assumes the user has already checked for this directory existence. 1071 * @param dir 1072 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 1073 * whether the directory exists or not, and returns true if it exists. 1074 * @throws IOException 1075 */ 1076 boolean createDir(Path dir) throws IOException { 1077 int i = 0; 1078 IOException lastIOE = null; 1079 do { 1080 try { 1081 return mkdirs(fs, conf, dir); 1082 } catch (IOException ioe) { 1083 lastIOE = ioe; 1084 if (fs.exists(dir)) return true; // directory is present 1085 try { 1086 sleepBeforeRetry("Create Directory", i+1); 1087 } catch (InterruptedException e) { 1088 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1089 } 1090 } 1091 } while (++i <= hdfsClientRetriesNumber); 1092 throw new IOException("Exception in createDir", lastIOE); 1093 } 1094 1095 /** 1096 * Renames a directory. Assumes the user has already checked for this directory existence. 1097 * @param srcpath 1098 * @param dstPath 1099 * @return true if rename is successful. 1100 * @throws IOException 1101 */ 1102 boolean rename(Path srcpath, Path dstPath) throws IOException { 1103 IOException lastIOE = null; 1104 int i = 0; 1105 do { 1106 try { 1107 return fs.rename(srcpath, dstPath); 1108 } catch (IOException ioe) { 1109 lastIOE = ioe; 1110 if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move 1111 // dir is not there, retry after some time. 1112 try { 1113 sleepBeforeRetry("Rename Directory", i+1); 1114 } catch (InterruptedException e) { 1115 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1116 } 1117 } 1118 } while (++i <= hdfsClientRetriesNumber); 1119 1120 throw new IOException("Exception in rename", lastIOE); 1121 } 1122 1123 /** 1124 * Deletes a directory. Assumes the user has already checked for this directory existence. 1125 * @param dir 1126 * @return true if the directory is deleted. 1127 * @throws IOException 1128 */ 1129 boolean deleteDir(Path dir) throws IOException { 1130 IOException lastIOE = null; 1131 int i = 0; 1132 do { 1133 try { 1134 return fs.delete(dir, true); 1135 } catch (IOException ioe) { 1136 lastIOE = ioe; 1137 if (!fs.exists(dir)) return true; 1138 // dir is there, retry deleting after some time. 1139 try { 1140 sleepBeforeRetry("Delete Directory", i+1); 1141 } catch (InterruptedException e) { 1142 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1143 } 1144 } 1145 } while (++i <= hdfsClientRetriesNumber); 1146 1147 throw new IOException("Exception in DeleteDir", lastIOE); 1148 } 1149 1150 /** 1151 * sleeping logic; handles the interrupt exception. 1152 */ 1153 private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException { 1154 sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber); 1155 } 1156 1157 /** 1158 * Creates a directory for a filesystem and configuration object. Assumes the user has already 1159 * checked for this directory existence. 1160 * @param fs 1161 * @param conf 1162 * @param dir 1163 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 1164 * whether the directory exists or not, and returns true if it exists. 1165 * @throws IOException 1166 */ 1167 private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir) 1168 throws IOException { 1169 int i = 0; 1170 IOException lastIOE = null; 1171 int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number", 1172 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 1173 int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries", 1174 DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 1175 do { 1176 try { 1177 return fs.mkdirs(dir); 1178 } catch (IOException ioe) { 1179 lastIOE = ioe; 1180 if (fs.exists(dir)) return true; // directory is present 1181 try { 1182 sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber); 1183 } catch (InterruptedException e) { 1184 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1185 } 1186 } 1187 } while (++i <= hdfsClientRetriesNumber); 1188 1189 throw new IOException("Exception in createDir", lastIOE); 1190 } 1191 1192 /** 1193 * sleeping logic for static methods; handles the interrupt exception. Keeping a static version 1194 * for this to avoid re-looking for the integer values. 1195 */ 1196 private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries, 1197 int hdfsClientRetriesNumber) throws InterruptedException { 1198 if (sleepMultiplier > hdfsClientRetriesNumber) { 1199 if (LOG.isDebugEnabled()) { 1200 LOG.debug(msg + ", retries exhausted"); 1201 } 1202 return; 1203 } 1204 if (LOG.isDebugEnabled()) { 1205 LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier); 1206 } 1207 Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier); 1208 } 1209}