001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.io.InterruptedIOException;
023import java.util.ArrayList;
024import java.util.Collection;
025import java.util.List;
026import java.util.Objects;
027import java.util.Optional;
028import java.util.UUID;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.fs.FSDataInputStream;
031import org.apache.hadoop.fs.FSDataOutputStream;
032import org.apache.hadoop.fs.FileStatus;
033import org.apache.hadoop.fs.FileSystem;
034import org.apache.hadoop.fs.FileUtil;
035import org.apache.hadoop.fs.LocatedFileStatus;
036import org.apache.hadoop.fs.Path;
037import org.apache.hadoop.fs.permission.FsPermission;
038import org.apache.hadoop.hbase.Cell;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.PrivateCellUtil;
041import org.apache.hadoop.hbase.backup.HFileArchiver;
042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.fs.HFileSystem;
046import org.apache.hadoop.hbase.io.Reference;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.FSHDFSUtils;
049import org.apache.hadoop.hbase.util.FSUtils;
050import org.apache.hadoop.hbase.util.Pair;
051import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
052import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
053import org.apache.yetus.audience.InterfaceAudience;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
057
058import edu.umd.cs.findbugs.annotations.Nullable;
059
060/**
061 * View to an on-disk Region.
062 * Provides the set of methods necessary to interact with the on-disk region data.
063 */
064@InterfaceAudience.Private
065public class HRegionFileSystem {
066  private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class);
067
068  /** Name of the region info file that resides just under the region directory. */
069  public final static String REGION_INFO_FILE = ".regioninfo";
070
071  /** Temporary subdirectory of the region directory used for merges. */
072  public static final String REGION_MERGES_DIR = ".merges";
073
074  /** Temporary subdirectory of the region directory used for splits. */
075  public static final String REGION_SPLITS_DIR = ".splits";
076
077  /** Temporary subdirectory of the region directory used for compaction output. */
078  @VisibleForTesting static final String REGION_TEMP_DIR = ".tmp";
079
080  private final RegionInfo regionInfo;
081  //regionInfo for interacting with FS (getting encodedName, etc)
082  private final RegionInfo regionInfoForFs;
083  private final Configuration conf;
084  private final Path tableDir;
085  private final FileSystem fs;
086  private final Path regionDir;
087
088  /**
089   * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
090   * client level.
091   */
092  private final int hdfsClientRetriesNumber;
093  private final int baseSleepBeforeRetries;
094  private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
095  private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
096
097  /**
098   * Create a view to the on-disk region
099   * @param conf the {@link Configuration} to use
100   * @param fs {@link FileSystem} that contains the region
101   * @param tableDir {@link Path} to where the table is being stored
102   * @param regionInfo {@link RegionInfo} for region
103   */
104  HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
105      final RegionInfo regionInfo) {
106    this.fs = fs;
107    this.conf = conf;
108    this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null");
109    this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null");
110    this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
111    this.regionDir = FSUtils.getRegionDir(tableDir, regionInfo);
112    this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
113      DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
114    this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
115      DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
116 }
117
118  /** @return the underlying {@link FileSystem} */
119  public FileSystem getFileSystem() {
120    return this.fs;
121  }
122
123  /** @return the {@link RegionInfo} that describe this on-disk region view */
124  public RegionInfo getRegionInfo() {
125    return this.regionInfo;
126  }
127
128  public RegionInfo getRegionInfoForFS() {
129    return this.regionInfoForFs;
130  }
131
132  /** @return {@link Path} to the region's root directory. */
133  public Path getTableDir() {
134    return this.tableDir;
135  }
136
137  /** @return {@link Path} to the region directory. */
138  public Path getRegionDir() {
139    return regionDir;
140  }
141
142  // ===========================================================================
143  //  Temp Helpers
144  // ===========================================================================
145  /** @return {@link Path} to the region's temp directory, used for file creations */
146  Path getTempDir() {
147    return new Path(getRegionDir(), REGION_TEMP_DIR);
148  }
149
150  /**
151   * Clean up any temp detritus that may have been left around from previous operation attempts.
152   */
153  void cleanupTempDir() throws IOException {
154    deleteDir(getTempDir());
155  }
156
157  // ===========================================================================
158  //  Store/StoreFile Helpers
159  // ===========================================================================
160  /**
161   * Returns the directory path of the specified family
162   * @param familyName Column Family Name
163   * @return {@link Path} to the directory of the specified family
164   */
165  public Path getStoreDir(final String familyName) {
166    return new Path(this.getRegionDir(), familyName);
167  }
168
169  /**
170   * Create the store directory for the specified family name
171   * @param familyName Column Family Name
172   * @return {@link Path} to the directory of the specified family
173   * @throws IOException if the directory creation fails.
174   */
175  Path createStoreDir(final String familyName) throws IOException {
176    Path storeDir = getStoreDir(familyName);
177    if(!fs.exists(storeDir) && !createDir(storeDir))
178      throw new IOException("Failed creating "+storeDir);
179    return storeDir;
180  }
181
182  /**
183   * Set the directory of CF to the specified storage policy. <br>
184   * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
185   * <i>"COLD"</i> <br>
186   * <br>
187   * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
188   * @param familyName The name of column family.
189   * @param policyName The name of the storage policy: 'HOT', 'COLD', etc.
190   * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
191   * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
192   */
193  public void setStoragePolicy(String familyName, String policyName) {
194    FSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName);
195  }
196
197  /**
198   * Get the storage policy of the directory of CF.
199   * @param familyName The name of column family.
200   * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception
201   *         thrown when trying to get policy
202   */
203  @Nullable
204  public String getStoragePolicyName(String familyName) {
205    if (this.fs instanceof HFileSystem) {
206      Path storeDir = getStoreDir(familyName);
207      return ((HFileSystem) this.fs).getStoragePolicyName(storeDir);
208    }
209
210    return null;
211  }
212
213  /**
214   * Returns the store files available for the family.
215   * This methods performs the filtering based on the valid store files.
216   * @param familyName Column Family Name
217   * @return a set of {@link StoreFileInfo} for the specified family.
218   */
219  public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
220    return getStoreFiles(Bytes.toString(familyName));
221  }
222
223  public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
224    return getStoreFiles(familyName, true);
225  }
226
227  /**
228   * Returns the store files available for the family.
229   * This methods performs the filtering based on the valid store files.
230   * @param familyName Column Family Name
231   * @return a set of {@link StoreFileInfo} for the specified family.
232   */
233  public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
234      throws IOException {
235    Path familyDir = getStoreDir(familyName);
236    FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
237    if (files == null) {
238      if (LOG.isTraceEnabled()) {
239        LOG.trace("No StoreFiles for: " + familyDir);
240      }
241      return null;
242    }
243
244    ArrayList<StoreFileInfo> storeFiles = new ArrayList<>(files.length);
245    for (FileStatus status: files) {
246      if (validate && !StoreFileInfo.isValid(status)) {
247        LOG.warn("Invalid StoreFile: " + status.getPath());
248        continue;
249      }
250      StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
251        regionInfoForFs, familyName, status.getPath());
252      storeFiles.add(info);
253
254    }
255    return storeFiles;
256  }
257
258  /**
259   * Returns the store files' LocatedFileStatus which available for the family.
260   * This methods performs the filtering based on the valid store files.
261   * @param familyName Column Family Name
262   * @return a list of store files' LocatedFileStatus for the specified family.
263   */
264  public static List<LocatedFileStatus> getStoreFilesLocatedStatus(
265      final HRegionFileSystem regionfs, final String familyName,
266      final boolean validate) throws IOException {
267    Path familyDir = regionfs.getStoreDir(familyName);
268    List<LocatedFileStatus> locatedFileStatuses = FSUtils.listLocatedStatus(
269        regionfs.getFileSystem(), familyDir);
270    if (locatedFileStatuses == null) {
271      if (LOG.isTraceEnabled()) {
272        LOG.trace("No StoreFiles for: " + familyDir);
273      }
274      return null;
275    }
276
277    List<LocatedFileStatus> validStoreFiles = Lists.newArrayList();
278    for (LocatedFileStatus status : locatedFileStatuses) {
279      if (validate && !StoreFileInfo.isValid(status)) {
280        LOG.warn("Invalid StoreFile: " + status.getPath());
281      } else {
282        validStoreFiles.add(status);
283      }
284    }
285    return validStoreFiles;
286  }
287
288  /**
289   * Return Qualified Path of the specified family/file
290   *
291   * @param familyName Column Family Name
292   * @param fileName File Name
293   * @return The qualified Path for the specified family/file
294   */
295  Path getStoreFilePath(final String familyName, final String fileName) {
296    Path familyDir = getStoreDir(familyName);
297    return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory());
298  }
299
300  /**
301   * Return the store file information of the specified family/file.
302   *
303   * @param familyName Column Family Name
304   * @param fileName File Name
305   * @return The {@link StoreFileInfo} for the specified family/file
306   */
307  StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
308      throws IOException {
309    Path familyDir = getStoreDir(familyName);
310    return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
311      regionInfoForFs, familyName, new Path(familyDir, fileName));
312  }
313
314  /**
315   * Returns true if the specified family has reference files
316   * @param familyName Column Family Name
317   * @return true if family contains reference files
318   * @throws IOException
319   */
320  public boolean hasReferences(final String familyName) throws IOException {
321    Path storeDir = getStoreDir(familyName);
322    FileStatus[] files = FSUtils.listStatus(fs, storeDir);
323    if (files != null) {
324      for(FileStatus stat: files) {
325        if(stat.isDirectory()) {
326          continue;
327        }
328        if(StoreFileInfo.isReference(stat.getPath())) {
329          if (LOG.isTraceEnabled()) LOG.trace("Reference " + stat.getPath());
330          return true;
331        }
332      }
333    }
334    return false;
335  }
336
337  /**
338   * Check whether region has Reference file
339   * @param htd table desciptor of the region
340   * @return true if region has reference file
341   * @throws IOException
342   */
343  public boolean hasReferences(final TableDescriptor htd) throws IOException {
344    for (ColumnFamilyDescriptor family : htd.getColumnFamilies()) {
345      if (hasReferences(family.getNameAsString())) {
346        return true;
347      }
348    }
349    return false;
350  }
351
352  /**
353   * @return the set of families present on disk
354   * @throws IOException
355   */
356  public Collection<String> getFamilies() throws IOException {
357    FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
358    if (fds == null) return null;
359
360    ArrayList<String> families = new ArrayList<>(fds.length);
361    for (FileStatus status: fds) {
362      families.add(status.getPath().getName());
363    }
364
365    return families;
366  }
367
368  /**
369   * Remove the region family from disk, archiving the store files.
370   * @param familyName Column Family Name
371   * @throws IOException if an error occours during the archiving
372   */
373  public void deleteFamily(final String familyName) throws IOException {
374    // archive family store files
375    HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
376
377    // delete the family folder
378    Path familyDir = getStoreDir(familyName);
379    if(fs.exists(familyDir) && !deleteDir(familyDir))
380      throw new IOException("Could not delete family " + familyName
381          + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
382          + regionInfoForFs.getEncodedName() + ")");
383  }
384
385  /**
386   * Generate a unique file name, used by createTempName() and commitStoreFile()
387   * @param suffix extra information to append to the generated name
388   * @return Unique file name
389   */
390  private static String generateUniqueName(final String suffix) {
391    String name = UUID.randomUUID().toString().replaceAll("-", "");
392    if (suffix != null) name += suffix;
393    return name;
394  }
395
396  /**
397   * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
398   * to get a safer file creation.
399   * <code>
400   * Path file = fs.createTempName();
401   * ...StoreFile.Writer(file)...
402   * fs.commitStoreFile("family", file);
403   * </code>
404   *
405   * @return Unique {@link Path} of the temporary file
406   */
407  public Path createTempName() {
408    return createTempName(null);
409  }
410
411  /**
412   * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
413   * to get a safer file creation.
414   * <code>
415   * Path file = fs.createTempName();
416   * ...StoreFile.Writer(file)...
417   * fs.commitStoreFile("family", file);
418   * </code>
419   *
420   * @param suffix extra information to append to the generated name
421   * @return Unique {@link Path} of the temporary file
422   */
423  public Path createTempName(final String suffix) {
424    return new Path(getTempDir(), generateUniqueName(suffix));
425  }
426
427  /**
428   * Move the file from a build/temp location to the main family store directory.
429   * @param familyName Family that will gain the file
430   * @param buildPath {@link Path} to the file to commit.
431   * @return The new {@link Path} of the committed file
432   * @throws IOException
433   */
434  public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
435    Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false);
436    return commitStoreFile(buildPath, dstPath);
437  }
438
439  /**
440   * Generate the filename in the main family store directory for moving the file from a build/temp
441   *  location.
442   * @param familyName Family that will gain the file
443   * @param buildPath {@link Path} to the file to commit.
444   * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
445   * @param generateNewName False if you want to keep the buildPath name
446   * @return The new {@link Path} of the to be committed file
447   * @throws IOException
448   */
449  private Path preCommitStoreFile(final String familyName, final Path buildPath,
450      final long seqNum, final boolean generateNewName) throws IOException {
451    Path storeDir = getStoreDir(familyName);
452    if(!fs.exists(storeDir) && !createDir(storeDir))
453      throw new IOException("Failed creating " + storeDir);
454
455    String name = buildPath.getName();
456    if (generateNewName) {
457      name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
458    }
459    Path dstPath = new Path(storeDir, name);
460    if (!fs.exists(buildPath)) {
461      throw new FileNotFoundException(buildPath.toString());
462    }
463    if (LOG.isDebugEnabled()) {
464      LOG.debug("Committing " + buildPath + " as " + dstPath);
465    }
466    return dstPath;
467  }
468
469  /*
470   * Moves file from staging dir to region dir
471   * @param buildPath {@link Path} to the file to commit.
472   * @param dstPath {@link Path} to the file under region dir
473   * @return The {@link Path} of the committed file
474   * @throws IOException
475   */
476  Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException {
477    // buildPath exists, therefore not doing an exists() check.
478    if (!rename(buildPath, dstPath)) {
479      throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
480    }
481    return dstPath;
482  }
483
484  /**
485   * Archives the specified store file from the specified family.
486   * @param familyName Family that contains the store files
487   * @param filePath {@link Path} to the store file to remove
488   * @throws IOException if the archiving fails
489   */
490  public void removeStoreFile(final String familyName, final Path filePath)
491      throws IOException {
492    HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
493        this.tableDir, Bytes.toBytes(familyName), filePath);
494  }
495
496  /**
497   * Closes and archives the specified store files from the specified family.
498   * @param familyName Family that contains the store files
499   * @param storeFiles set of store files to remove
500   * @throws IOException if the archiving fails
501   */
502  public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles)
503      throws IOException {
504    HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
505        this.tableDir, Bytes.toBytes(familyName), storeFiles);
506  }
507
508  /**
509   * Bulk load: Add a specified store file to the specified family.
510   * If the source file is on the same different file-system is moved from the
511   * source location to the destination location, otherwise is copied over.
512   *
513   * @param familyName Family that will gain the file
514   * @param srcPath {@link Path} to the file to import
515   * @param seqNum Bulk Load sequence number
516   * @return The destination {@link Path} of the bulk loaded file
517   * @throws IOException
518   */
519  Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
520      throws IOException {
521    // Copy the file if it's on another filesystem
522    FileSystem srcFs = srcPath.getFileSystem(conf);
523    srcPath = srcFs.resolvePath(srcPath);
524    FileSystem realSrcFs = srcPath.getFileSystem(conf);
525    FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
526
527    // We can't compare FileSystem instances as equals() includes UGI instance
528    // as part of the comparison and won't work when doing SecureBulkLoad
529    // TODO deal with viewFS
530    if (!FSHDFSUtils.isSameHdfs(conf, realSrcFs, desFs)) {
531      LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
532          "the destination store. Copying file over to destination filesystem.");
533      Path tmpPath = createTempName();
534      FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf);
535      LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
536      srcPath = tmpPath;
537    }
538
539    return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true));
540  }
541
542  // ===========================================================================
543  //  Splits Helpers
544  // ===========================================================================
545  /** @return {@link Path} to the temp directory used during split operations */
546  Path getSplitsDir() {
547    return new Path(getRegionDir(), REGION_SPLITS_DIR);
548  }
549
550  public Path getSplitsDir(final RegionInfo hri) {
551    return new Path(getSplitsDir(), hri.getEncodedName());
552  }
553
554  /**
555   * Clean up any split detritus that may have been left around from previous split attempts.
556   */
557  void cleanupSplitsDir() throws IOException {
558    deleteDir(getSplitsDir());
559  }
560
561  /**
562   * Clean up any split detritus that may have been left around from previous
563   * split attempts.
564   * Call this method on initial region deploy.
565   * @throws IOException
566   */
567  void cleanupAnySplitDetritus() throws IOException {
568    Path splitdir = this.getSplitsDir();
569    if (!fs.exists(splitdir)) return;
570    // Look at the splitdir.  It could have the encoded names of the daughter
571    // regions we tried to make.  See if the daughter regions actually got made
572    // out under the tabledir.  If here under splitdir still, then the split did
573    // not complete.  Try and do cleanup.  This code WILL NOT catch the case
574    // where we successfully created daughter a but regionserver crashed during
575    // the creation of region b.  In this case, there'll be an orphan daughter
576    // dir in the filesystem.  TOOD: Fix.
577    FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
578    if (daughters != null) {
579      for (FileStatus daughter: daughters) {
580        Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
581        if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
582          throw new IOException("Failed delete of " + daughterDir);
583        }
584      }
585    }
586    cleanupSplitsDir();
587    LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
588  }
589
590  /**
591   * Remove daughter region
592   * @param regionInfo daughter {@link RegionInfo}
593   * @throws IOException
594   */
595  void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException {
596    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
597    if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
598      throw new IOException("Failed delete of " + regionDir);
599    }
600  }
601
602  /**
603   * Commit a daughter region, moving it from the split temporary directory
604   * to the proper location in the filesystem.
605   *
606   * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
607   * @throws IOException
608   */
609  public Path commitDaughterRegion(final RegionInfo regionInfo)
610      throws IOException {
611    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
612    Path daughterTmpDir = this.getSplitsDir(regionInfo);
613
614    if (fs.exists(daughterTmpDir)) {
615
616      // Write HRI to a file in case we need to recover hbase:meta
617      Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
618      byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
619      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
620
621      // Move the daughter temp dir to the table dir
622      if (!rename(daughterTmpDir, regionDir)) {
623        throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
624      }
625    }
626
627    return regionDir;
628  }
629
630  /**
631   * Create the region splits directory.
632   */
633  public void createSplitsDir() throws IOException {
634    Path splitdir = getSplitsDir();
635    if (fs.exists(splitdir)) {
636      LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
637      if (!deleteDir(splitdir)) {
638        throw new IOException("Failed deletion of " + splitdir
639            + " before creating them again.");
640      }
641    }
642    // splitDir doesn't exists now. No need to do an exists() call for it.
643    if (!createDir(splitdir)) {
644      throw new IOException("Failed create of " + splitdir);
645    }
646  }
647
648  /**
649   * Write out a split reference. Package local so it doesnt leak out of
650   * regionserver.
651   * @param hri {@link RegionInfo} of the destination
652   * @param familyName Column Family Name
653   * @param f File to split.
654   * @param splitRow Split Row
655   * @param top True if we are referring to the top half of the hfile.
656   * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if
657   *                    this method is invoked on the Master side, then the RegionSplitPolicy will
658   *                    NOT have a reference to a Region.
659   * @return Path to created reference.
660   * @throws IOException
661   */
662  public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow,
663      boolean top, RegionSplitPolicy splitPolicy) throws IOException {
664    if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
665      // Check whether the split row lies in the range of the store file
666      // If it is outside the range, return directly.
667      f.initReader();
668      try {
669        if (top) {
670          //check if larger than last key.
671          Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow);
672          Optional<Cell> lastKey = f.getLastKey();
673          // If lastKey is null means storefile is empty.
674          if (!lastKey.isPresent()) {
675            return null;
676          }
677          if (f.getComparator().compare(splitKey, lastKey.get()) > 0) {
678            return null;
679          }
680        } else {
681          //check if smaller than first key
682          Cell splitKey = PrivateCellUtil.createLastOnRow(splitRow);
683          Optional<Cell> firstKey = f.getFirstKey();
684          // If firstKey is null means storefile is empty.
685          if (!firstKey.isPresent()) {
686            return null;
687          }
688          if (f.getComparator().compare(splitKey, firstKey.get()) < 0) {
689            return null;
690          }
691        }
692      } finally {
693        f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
694      }
695    }
696
697    Path splitDir = new Path(getSplitsDir(hri), familyName);
698    // A reference to the bottom half of the hsf store file.
699    Reference r =
700      top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
701    // Add the referred-to regions name as a dot separated suffix.
702    // See REF_NAME_REGEX regex above.  The referred-to regions name is
703    // up in the path of the passed in <code>f</code> -- parentdir is family,
704    // then the directory above is the region name.
705    String parentRegionName = regionInfoForFs.getEncodedName();
706    // Write reference with same file id only with the other region name as
707    // suffix and into the new region location (under same family).
708    Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
709    return r.write(fs, p);
710  }
711
712  // ===========================================================================
713  //  Merge Helpers
714  // ===========================================================================
715  /** @return {@link Path} to the temp directory used during merge operations */
716  public Path getMergesDir() {
717    return new Path(getRegionDir(), REGION_MERGES_DIR);
718  }
719
720  Path getMergesDir(final RegionInfo hri) {
721    return new Path(getMergesDir(), hri.getEncodedName());
722  }
723
724  /**
725   * Clean up any merge detritus that may have been left around from previous merge attempts.
726   */
727  void cleanupMergesDir() throws IOException {
728    deleteDir(getMergesDir());
729  }
730
731  /**
732   * Remove merged region
733   * @param mergedRegion {@link RegionInfo}
734   * @throws IOException
735   */
736  public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException {
737    Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
738    if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
739      throw new IOException("Failed delete of " + regionDir);
740    }
741  }
742
743  static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException {
744    if (FSUtils.isDistributedFileSystem(fs) ||
745        !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) {
746      return fs.mkdirs(dir);
747    }
748    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
749    return fs.mkdirs(dir, perms);
750  }
751
752  /**
753   * Create the region merges directory.
754   * @throws IOException If merges dir already exists or we fail to create it.
755   * @see HRegionFileSystem#cleanupMergesDir()
756   */
757  public void createMergesDir() throws IOException {
758    Path mergesdir = getMergesDir();
759    if (fs.exists(mergesdir)) {
760      LOG.info("The " + mergesdir
761          + " directory exists.  Hence deleting it to recreate it");
762      if (!fs.delete(mergesdir, true)) {
763        throw new IOException("Failed deletion of " + mergesdir
764            + " before creating them again.");
765      }
766    }
767    if (!mkdirs(fs, conf, mergesdir))
768      throw new IOException("Failed create of " + mergesdir);
769  }
770
771  /**
772   * Write out a merge reference under the given merges directory. Package local
773   * so it doesnt leak out of regionserver.
774   * @param mergedRegion {@link RegionInfo} of the merged region
775   * @param familyName Column Family Name
776   * @param f File to create reference.
777   * @param mergedDir
778   * @return Path to created reference.
779   * @throws IOException
780   */
781  public Path mergeStoreFile(RegionInfo mergedRegion, String familyName, HStoreFile f,
782      Path mergedDir) throws IOException {
783    Path referenceDir = new Path(new Path(mergedDir,
784        mergedRegion.getEncodedName()), familyName);
785    // A whole reference to the store file.
786    Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
787    // Add the referred-to regions name as a dot separated suffix.
788    // See REF_NAME_REGEX regex above. The referred-to regions name is
789    // up in the path of the passed in <code>f</code> -- parentdir is family,
790    // then the directory above is the region name.
791    String mergingRegionName = regionInfoForFs.getEncodedName();
792    // Write reference with same file id only with the other region name as
793    // suffix and into the new region location (under same family).
794    Path p = new Path(referenceDir, f.getPath().getName() + "."
795        + mergingRegionName);
796    return r.write(fs, p);
797  }
798
799  /**
800   * Commit a merged region, moving it from the merges temporary directory to
801   * the proper location in the filesystem.
802   * @param mergedRegionInfo merged region {@link RegionInfo}
803   * @throws IOException
804   */
805  public void commitMergedRegion(final RegionInfo mergedRegionInfo) throws IOException {
806    Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
807    Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
808    // Move the tmp dir in the expected location
809    if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
810      if (!fs.rename(mergedRegionTmpDir, regionDir)) {
811        throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
812            + regionDir);
813      }
814    }
815  }
816
817  // ===========================================================================
818  //  Create/Open/Delete Helpers
819  // ===========================================================================
820  /**
821   * Log the current state of the region
822   * @param LOG log to output information
823   * @throws IOException if an unexpected exception occurs
824   */
825  void logFileSystemState(final Logger LOG) throws IOException {
826    FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
827  }
828
829  /**
830   * @param hri
831   * @return Content of the file we write out to the filesystem under a region
832   * @throws IOException
833   */
834  private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException {
835    return RegionInfo.toDelimitedByteArray(hri);
836  }
837
838  /**
839   * Create a {@link RegionInfo} from the serialized version on-disk.
840   * @param fs {@link FileSystem} that contains the Region Info file
841   * @param regionDir {@link Path} to the Region Directory that contains the Info file
842   * @return An {@link RegionInfo} instance gotten from the Region Info file.
843   * @throws IOException if an error occurred during file open/read operation.
844   */
845  public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
846      throws IOException {
847    FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
848    try {
849      return RegionInfo.parseFrom(in);
850    } finally {
851      in.close();
852    }
853  }
854
855  /**
856   * Write the .regioninfo file on-disk.
857   */
858  private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
859      final Path regionInfoFile, final byte[] content) throws IOException {
860    // First check to get the permissions
861    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
862    // Write the RegionInfo file content
863    FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null);
864    try {
865      out.write(content);
866    } finally {
867      out.close();
868    }
869  }
870
871  /**
872   * Write out an info file under the stored region directory. Useful recovering mangled regions.
873   * If the regionInfo already exists on-disk, then we fast exit.
874   */
875  void checkRegionInfoOnFilesystem() throws IOException {
876    // Compose the content of the file so we can compare to length in filesystem. If not same,
877    // rewrite it (it may have been written in the old format using Writables instead of pb). The
878    // pb version is much shorter -- we write now w/o the toString version -- so checking length
879    // only should be sufficient. I don't want to read the file every time to check if it pb
880    // serialized.
881    byte[] content = getRegionInfoFileContent(regionInfoForFs);
882
883    // Verify if the region directory exists before opening a region. We need to do this since if
884    // the region directory doesn't exist we will re-create the region directory and a new HRI
885    // when HRegion.openHRegion() is called.
886    try {
887      FileStatus status = fs.getFileStatus(getRegionDir());
888    } catch (FileNotFoundException e) {
889      LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
890          " on table " + regionInfo.getTable());
891    }
892
893    try {
894      Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
895      FileStatus status = fs.getFileStatus(regionInfoFile);
896      if (status != null && status.getLen() == content.length) {
897        // Then assume the content good and move on.
898        // NOTE: that the length is not sufficient to define the the content matches.
899        return;
900      }
901
902      LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
903      if (!fs.delete(regionInfoFile, false)) {
904        throw new IOException("Unable to remove existing " + regionInfoFile);
905      }
906    } catch (FileNotFoundException e) {
907      LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
908          " on table " + regionInfo.getTable());
909    }
910
911    // Write HRI to a file in case we need to recover hbase:meta
912    writeRegionInfoOnFilesystem(content, true);
913  }
914
915  /**
916   * Write out an info file under the region directory. Useful recovering mangled regions.
917   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
918   */
919  private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
920    byte[] content = getRegionInfoFileContent(regionInfoForFs);
921    writeRegionInfoOnFilesystem(content, useTempDir);
922  }
923
924  /**
925   * Write out an info file under the region directory. Useful recovering mangled regions.
926   * @param regionInfoContent serialized version of the {@link RegionInfo}
927   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
928   */
929  private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
930      final boolean useTempDir) throws IOException {
931    Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
932    if (useTempDir) {
933      // Create in tmpDir and then move into place in case we crash after
934      // create but before close. If we don't successfully close the file,
935      // subsequent region reopens will fail the below because create is
936      // registered in NN.
937
938      // And then create the file
939      Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
940
941      // If datanode crashes or if the RS goes down just before the close is called while trying to
942      // close the created regioninfo file in the .tmp directory then on next
943      // creation we will be getting AlreadyCreatedException.
944      // Hence delete and create the file if exists.
945      if (FSUtils.isExists(fs, tmpPath)) {
946        FSUtils.delete(fs, tmpPath, true);
947      }
948
949      // Write HRI to a file in case we need to recover hbase:meta
950      writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
951
952      // Move the created file to the original path
953      if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
954        throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
955      }
956    } else {
957      // Write HRI to a file in case we need to recover hbase:meta
958      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
959    }
960  }
961
962  /**
963   * Create a new Region on file-system.
964   * @param conf the {@link Configuration} to use
965   * @param fs {@link FileSystem} from which to add the region
966   * @param tableDir {@link Path} to where the table is being stored
967   * @param regionInfo {@link RegionInfo} for region to be added
968   * @throws IOException if the region creation fails due to a FileSystem exception.
969   */
970  public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
971      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
972    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
973
974    // We only create a .regioninfo and the region directory if this is the default region replica
975    if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
976      Path regionDir = regionFs.getRegionDir();
977      if (fs.exists(regionDir)) {
978        LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
979        throw new IOException("The specified region already exists on disk: " + regionDir);
980      }
981
982      // Create the region directory
983      if (!createDirOnFileSystem(fs, conf, regionDir)) {
984        LOG.warn("Unable to create the region directory: " + regionDir);
985        throw new IOException("Unable to create region directory: " + regionDir);
986      }
987
988      // Write HRI to a file in case we need to recover hbase:meta
989      regionFs.writeRegionInfoOnFilesystem(false);
990    } else {
991      if (LOG.isDebugEnabled())
992        LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
993    }
994    return regionFs;
995  }
996
997  /**
998   * Open Region from file-system.
999   * @param conf the {@link Configuration} to use
1000   * @param fs {@link FileSystem} from which to add the region
1001   * @param tableDir {@link Path} to where the table is being stored
1002   * @param regionInfo {@link RegionInfo} for region to be added
1003   * @param readOnly True if you don't want to edit the region data
1004   * @throws IOException if the region creation fails due to a FileSystem exception.
1005   */
1006  public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
1007      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly)
1008      throws IOException {
1009    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1010    Path regionDir = regionFs.getRegionDir();
1011
1012    if (!fs.exists(regionDir)) {
1013      LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
1014      throw new IOException("The specified region do not exists on disk: " + regionDir);
1015    }
1016
1017    if (!readOnly) {
1018      // Cleanup temporary directories
1019      regionFs.cleanupTempDir();
1020      regionFs.cleanupSplitsDir();
1021      regionFs.cleanupMergesDir();
1022
1023      // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
1024      // Only create HRI if we are the default replica
1025      if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1026        regionFs.checkRegionInfoOnFilesystem();
1027      } else {
1028        if (LOG.isDebugEnabled()) {
1029          LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
1030        }
1031      }
1032    }
1033
1034    return regionFs;
1035  }
1036
1037  /**
1038   * Remove the region from the table directory, archiving the region's hfiles.
1039   * @param conf the {@link Configuration} to use
1040   * @param fs {@link FileSystem} from which to remove the region
1041   * @param tableDir {@link Path} to where the table is being stored
1042   * @param regionInfo {@link RegionInfo} for region to be deleted
1043   * @throws IOException if the request cannot be completed
1044   */
1045  public static void deleteRegionFromFileSystem(final Configuration conf,
1046      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
1047    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1048    Path regionDir = regionFs.getRegionDir();
1049
1050    if (!fs.exists(regionDir)) {
1051      LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
1052      return;
1053    }
1054
1055    if (LOG.isDebugEnabled()) {
1056      LOG.debug("DELETING region " + regionDir);
1057    }
1058
1059    // Archive region
1060    Path rootDir = FSUtils.getRootDir(conf);
1061    HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
1062
1063    // Delete empty region dir
1064    if (!fs.delete(regionDir, true)) {
1065      LOG.warn("Failed delete of " + regionDir);
1066    }
1067  }
1068
1069  /**
1070   * Creates a directory. Assumes the user has already checked for this directory existence.
1071   * @param dir
1072   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1073   *         whether the directory exists or not, and returns true if it exists.
1074   * @throws IOException
1075   */
1076  boolean createDir(Path dir) throws IOException {
1077    int i = 0;
1078    IOException lastIOE = null;
1079    do {
1080      try {
1081        return mkdirs(fs, conf, dir);
1082      } catch (IOException ioe) {
1083        lastIOE = ioe;
1084        if (fs.exists(dir)) return true; // directory is present
1085        try {
1086          sleepBeforeRetry("Create Directory", i+1);
1087        } catch (InterruptedException e) {
1088          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1089        }
1090      }
1091    } while (++i <= hdfsClientRetriesNumber);
1092    throw new IOException("Exception in createDir", lastIOE);
1093  }
1094
1095  /**
1096   * Renames a directory. Assumes the user has already checked for this directory existence.
1097   * @param srcpath
1098   * @param dstPath
1099   * @return true if rename is successful.
1100   * @throws IOException
1101   */
1102  boolean rename(Path srcpath, Path dstPath) throws IOException {
1103    IOException lastIOE = null;
1104    int i = 0;
1105    do {
1106      try {
1107        return fs.rename(srcpath, dstPath);
1108      } catch (IOException ioe) {
1109        lastIOE = ioe;
1110        if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1111        // dir is not there, retry after some time.
1112        try {
1113          sleepBeforeRetry("Rename Directory", i+1);
1114        } catch (InterruptedException e) {
1115          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1116        }
1117      }
1118    } while (++i <= hdfsClientRetriesNumber);
1119
1120    throw new IOException("Exception in rename", lastIOE);
1121  }
1122
1123  /**
1124   * Deletes a directory. Assumes the user has already checked for this directory existence.
1125   * @param dir
1126   * @return true if the directory is deleted.
1127   * @throws IOException
1128   */
1129  boolean deleteDir(Path dir) throws IOException {
1130    IOException lastIOE = null;
1131    int i = 0;
1132    do {
1133      try {
1134        return fs.delete(dir, true);
1135      } catch (IOException ioe) {
1136        lastIOE = ioe;
1137        if (!fs.exists(dir)) return true;
1138        // dir is there, retry deleting after some time.
1139        try {
1140          sleepBeforeRetry("Delete Directory", i+1);
1141        } catch (InterruptedException e) {
1142          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1143        }
1144      }
1145    } while (++i <= hdfsClientRetriesNumber);
1146
1147    throw new IOException("Exception in DeleteDir", lastIOE);
1148  }
1149
1150  /**
1151   * sleeping logic; handles the interrupt exception.
1152   */
1153  private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1154    sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1155  }
1156
1157  /**
1158   * Creates a directory for a filesystem and configuration object. Assumes the user has already
1159   * checked for this directory existence.
1160   * @param fs
1161   * @param conf
1162   * @param dir
1163   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1164   *         whether the directory exists or not, and returns true if it exists.
1165   * @throws IOException
1166   */
1167  private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1168      throws IOException {
1169    int i = 0;
1170    IOException lastIOE = null;
1171    int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1172      DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1173    int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1174      DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1175    do {
1176      try {
1177        return fs.mkdirs(dir);
1178      } catch (IOException ioe) {
1179        lastIOE = ioe;
1180        if (fs.exists(dir)) return true; // directory is present
1181        try {
1182          sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1183        } catch (InterruptedException e) {
1184          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1185        }
1186      }
1187    } while (++i <= hdfsClientRetriesNumber);
1188
1189    throw new IOException("Exception in createDir", lastIOE);
1190  }
1191
1192  /**
1193   * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1194   * for this to avoid re-looking for the integer values.
1195   */
1196  private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1197      int hdfsClientRetriesNumber) throws InterruptedException {
1198    if (sleepMultiplier > hdfsClientRetriesNumber) {
1199      if (LOG.isDebugEnabled()) {
1200        LOG.debug(msg + ", retries exhausted");
1201      }
1202      return;
1203    }
1204    if (LOG.isDebugEnabled()) {
1205      LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1206    }
1207    Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1208  }
1209}