001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.util;
020
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.lang.reflect.InvocationTargetException;
024import java.lang.reflect.Method;
025import java.net.URI;
026import java.net.URISyntaxException;
027import java.util.List;
028import java.util.Locale;
029import java.util.Map;
030import java.util.concurrent.ConcurrentHashMap;
031
032import org.apache.hadoop.HadoopIllegalArgumentException;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.fs.FSDataOutputStream;
035import org.apache.hadoop.fs.FileStatus;
036import org.apache.hadoop.fs.FileSystem;
037import org.apache.hadoop.fs.LocatedFileStatus;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.fs.PathFilter;
040import org.apache.hadoop.fs.RemoteIterator;
041import org.apache.hadoop.fs.permission.FsPermission;
042import org.apache.hadoop.hbase.HConstants;
043import org.apache.hadoop.hbase.TableName;
044import org.apache.hadoop.ipc.RemoteException;
045import org.apache.yetus.audience.InterfaceAudience;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
050import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
051
052/**
053 * Utility methods for interacting with the underlying file system.
054 * <p/>
055 * Note that {@link #setStoragePolicy(FileSystem, Path, String)} is tested in TestFSUtils and
056 * pre-commit will run the hbase-server tests if there's code change in this class. See
057 * <a href="https://issues.apache.org/jira/browse/HBASE-20838">HBASE-20838</a> for more details.
058 */
059@InterfaceAudience.Private
060public abstract class CommonFSUtils {
061  private static final Logger LOG = LoggerFactory.getLogger(CommonFSUtils.class);
062
063  /** Parameter name for HBase WAL directory */
064  public static final String HBASE_WAL_DIR = "hbase.wal.dir";
065
066  /** Parameter to disable stream capability enforcement checks */
067  public static final String UNSAFE_STREAM_CAPABILITY_ENFORCE = "hbase.unsafe.stream.capability.enforce";
068
069  /** Full access permissions (starting point for a umask) */
070  public static final String FULL_RWX_PERMISSIONS = "777";
071
072  protected CommonFSUtils() {
073    super();
074  }
075
076  /**
077   * Compare of path component. Does not consider schema; i.e. if schemas
078   * different but <code>path</code> starts with <code>rootPath</code>,
079   * then the function returns true
080   * @param rootPath value to check for
081   * @param path subject to check
082   * @return True if <code>path</code> starts with <code>rootPath</code>
083   */
084  public static boolean isStartingWithPath(final Path rootPath, final String path) {
085    String uriRootPath = rootPath.toUri().getPath();
086    String tailUriPath = (new Path(path)).toUri().getPath();
087    return tailUriPath.startsWith(uriRootPath);
088  }
089
090  /**
091   * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
092   * '/a/b/c' part. Does not consider schema; i.e. if schemas different but path or subpath matches,
093   * the two will equate.
094   * @param pathToSearch Path we will be trying to match against.
095   * @param pathTail what to match
096   * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
097   */
098  public static boolean isMatchingTail(final Path pathToSearch, String pathTail) {
099    return isMatchingTail(pathToSearch, new Path(pathTail));
100  }
101
102  /**
103   * Compare path component of the Path URI; e.g. if hdfs://a/b/c and /a/b/c, it will compare the
104   * '/a/b/c' part. If you passed in 'hdfs://a/b/c and b/c, it would return true.  Does not consider
105   * schema; i.e. if schemas different but path or subpath matches, the two will equate.
106   * @param pathToSearch Path we will be trying to match agains against
107   * @param pathTail what to match
108   * @return True if <code>pathTail</code> is tail on the path of <code>pathToSearch</code>
109   */
110  public static boolean isMatchingTail(final Path pathToSearch, final Path pathTail) {
111    if (pathToSearch.depth() != pathTail.depth()) {
112      return false;
113    }
114    Path tailPath = pathTail;
115    String tailName;
116    Path toSearch = pathToSearch;
117    String toSearchName;
118    boolean result = false;
119    do {
120      tailName = tailPath.getName();
121      if (tailName == null || tailName.length() <= 0) {
122        result = true;
123        break;
124      }
125      toSearchName = toSearch.getName();
126      if (toSearchName == null || toSearchName.length() <= 0) {
127        break;
128      }
129      // Move up a parent on each path for next go around.  Path doesn't let us go off the end.
130      tailPath = tailPath.getParent();
131      toSearch = toSearch.getParent();
132    } while(tailName.equals(toSearchName));
133    return result;
134  }
135
136  /**
137   * Delete if exists.
138   * @param fs filesystem object
139   * @param dir directory to delete
140   * @return True if deleted <code>dir</code>
141   * @throws IOException e
142   */
143  public static boolean deleteDirectory(final FileSystem fs, final Path dir)
144  throws IOException {
145    return fs.exists(dir) && fs.delete(dir, true);
146  }
147
148  /**
149   * Return the number of bytes that large input files should be optimally
150   * be split into to minimize i/o time.
151   *
152   * use reflection to search for getDefaultBlockSize(Path f)
153   * if the method doesn't exist, fall back to using getDefaultBlockSize()
154   *
155   * @param fs filesystem object
156   * @return the default block size for the path's filesystem
157   * @throws IOException e
158   */
159  public static long getDefaultBlockSize(final FileSystem fs, final Path path) throws IOException {
160    Method m = null;
161    Class<? extends FileSystem> cls = fs.getClass();
162    try {
163      m = cls.getMethod("getDefaultBlockSize", new Class<?>[] { Path.class });
164    } catch (NoSuchMethodException e) {
165      LOG.info("FileSystem doesn't support getDefaultBlockSize");
166    } catch (SecurityException e) {
167      LOG.info("Doesn't have access to getDefaultBlockSize on FileSystems", e);
168      m = null; // could happen on setAccessible()
169    }
170    if (m == null) {
171      return fs.getDefaultBlockSize(path);
172    } else {
173      try {
174        Object ret = m.invoke(fs, path);
175        return ((Long)ret).longValue();
176      } catch (Exception e) {
177        throw new IOException(e);
178      }
179    }
180  }
181
182  /*
183   * Get the default replication.
184   *
185   * use reflection to search for getDefaultReplication(Path f)
186   * if the method doesn't exist, fall back to using getDefaultReplication()
187   *
188   * @param fs filesystem object
189   * @param f path of file
190   * @return default replication for the path's filesystem
191   * @throws IOException e
192   */
193  public static short getDefaultReplication(final FileSystem fs, final Path path)
194      throws IOException {
195    Method m = null;
196    Class<? extends FileSystem> cls = fs.getClass();
197    try {
198      m = cls.getMethod("getDefaultReplication", new Class<?>[] { Path.class });
199    } catch (NoSuchMethodException e) {
200      LOG.info("FileSystem doesn't support getDefaultReplication");
201    } catch (SecurityException e) {
202      LOG.info("Doesn't have access to getDefaultReplication on FileSystems", e);
203      m = null; // could happen on setAccessible()
204    }
205    if (m == null) {
206      return fs.getDefaultReplication(path);
207    } else {
208      try {
209        Object ret = m.invoke(fs, path);
210        return ((Number)ret).shortValue();
211      } catch (Exception e) {
212        throw new IOException(e);
213      }
214    }
215  }
216
217  /**
218   * Returns the default buffer size to use during writes.
219   *
220   * The size of the buffer should probably be a multiple of hardware
221   * page size (4096 on Intel x86), and it determines how much data is
222   * buffered during read and write operations.
223   *
224   * @param fs filesystem object
225   * @return default buffer size to use during writes
226   */
227  public static int getDefaultBufferSize(final FileSystem fs) {
228    return fs.getConf().getInt("io.file.buffer.size", 4096);
229  }
230
231  /**
232   * Create the specified file on the filesystem. By default, this will:
233   * <ol>
234   * <li>apply the umask in the configuration (if it is enabled)</li>
235   * <li>use the fs configured buffer size (or 4096 if not set)</li>
236   * <li>use the default replication</li>
237   * <li>use the default block size</li>
238   * <li>not track progress</li>
239   * </ol>
240   *
241   * @param fs {@link FileSystem} on which to write the file
242   * @param path {@link Path} to the file to write
243   * @param perm intial permissions
244   * @param overwrite Whether or not the created file should be overwritten.
245   * @return output stream to the created file
246   * @throws IOException if the file cannot be created
247   */
248  public static FSDataOutputStream create(FileSystem fs, Path path,
249      FsPermission perm, boolean overwrite) throws IOException {
250    if (LOG.isTraceEnabled()) {
251      LOG.trace("Creating file=" + path + " with permission=" + perm + ", overwrite=" + overwrite);
252    }
253    return fs.create(path, perm, overwrite, getDefaultBufferSize(fs),
254        getDefaultReplication(fs, path), getDefaultBlockSize(fs, path), null);
255  }
256
257  /**
258   * Get the file permissions specified in the configuration, if they are
259   * enabled.
260   *
261   * @param fs filesystem that the file will be created on.
262   * @param conf configuration to read for determining if permissions are
263   *          enabled and which to use
264   * @param permssionConfKey property key in the configuration to use when
265   *          finding the permission
266   * @return the permission to use when creating a new file on the fs. If
267   *         special permissions are not specified in the configuration, then
268   *         the default permissions on the the fs will be returned.
269   */
270  public static FsPermission getFilePermissions(final FileSystem fs,
271      final Configuration conf, final String permssionConfKey) {
272    boolean enablePermissions = conf.getBoolean(
273        HConstants.ENABLE_DATA_FILE_UMASK, false);
274
275    if (enablePermissions) {
276      try {
277        FsPermission perm = new FsPermission(FULL_RWX_PERMISSIONS);
278        // make sure that we have a mask, if not, go default.
279        String mask = conf.get(permssionConfKey);
280        if (mask == null) {
281          return FsPermission.getFileDefault();
282        }
283        // appy the umask
284        FsPermission umask = new FsPermission(mask);
285        return perm.applyUMask(umask);
286      } catch (IllegalArgumentException e) {
287        LOG.warn(
288            "Incorrect umask attempted to be created: "
289                + conf.get(permssionConfKey)
290                + ", using default file permissions.", e);
291        return FsPermission.getFileDefault();
292      }
293    }
294    return FsPermission.getFileDefault();
295  }
296
297  /**
298   * Verifies root directory path is a valid URI with a scheme
299   *
300   * @param root root directory path
301   * @return Passed <code>root</code> argument.
302   * @throws IOException if not a valid URI with a scheme
303   */
304  public static Path validateRootPath(Path root) throws IOException {
305    try {
306      URI rootURI = new URI(root.toString());
307      String scheme = rootURI.getScheme();
308      if (scheme == null) {
309        throw new IOException("Root directory does not have a scheme");
310      }
311      return root;
312    } catch (URISyntaxException e) {
313      IOException io = new IOException("Root directory path is not a valid " +
314        "URI -- check your " + HConstants.HBASE_DIR + " configuration");
315      io.initCause(e);
316      throw io;
317    }
318  }
319
320  /**
321   * Checks for the presence of the WAL log root path (using the provided conf object) in the given
322   * path. If it exists, this method removes it and returns the String representation of remaining
323   * relative path.
324   * @param path must not be null
325   * @param conf must not be null
326   * @return String representation of the remaining relative path
327   * @throws IOException from underlying filesystem
328   */
329  public static String removeWALRootPath(Path path, final Configuration conf) throws IOException {
330    Path root = getWALRootDir(conf);
331    String pathStr = path.toString();
332    // check that the path is absolute... it has the root path in it.
333    if (!pathStr.startsWith(root.toString())) {
334      return pathStr;
335    }
336    // if not, return as it is.
337    return pathStr.substring(root.toString().length() + 1);// remove the "/" too.
338  }
339
340  /**
341   * Return the 'path' component of a Path.  In Hadoop, Path is an URI.  This
342   * method returns the 'path' component of a Path's URI: e.g. If a Path is
343   * <code>hdfs://example.org:9000/hbase_trunk/TestTable/compaction.dir</code>,
344   * this method returns <code>/hbase_trunk/TestTable/compaction.dir</code>.
345   * This method is useful if you want to print out a Path without qualifying
346   * Filesystem instance.
347   * @param p Filesystem Path whose 'path' component we are to return.
348   * @return Path portion of the Filesystem
349   */
350  public static String getPath(Path p) {
351    return p.toUri().getPath();
352  }
353
354  /**
355   * @param c configuration
356   * @return {@link Path} to hbase root directory from
357   *     configuration as a qualified Path.
358   * @throws IOException e
359   */
360  public static Path getRootDir(final Configuration c) throws IOException {
361    Path p = new Path(c.get(HConstants.HBASE_DIR));
362    FileSystem fs = p.getFileSystem(c);
363    return p.makeQualified(fs.getUri(), fs.getWorkingDirectory());
364  }
365
366  public static void setRootDir(final Configuration c, final Path root) throws IOException {
367    c.set(HConstants.HBASE_DIR, root.toString());
368  }
369
370  public static void setFsDefault(final Configuration c, final Path root) throws IOException {
371    c.set("fs.defaultFS", root.toString());    // for hadoop 0.21+
372  }
373
374  public static FileSystem getRootDirFileSystem(final Configuration c) throws IOException {
375    Path p = getRootDir(c);
376    return p.getFileSystem(c);
377  }
378
379  /**
380   * @param c configuration
381   * @return {@link Path} to hbase log root directory: e.g. {@value HBASE_WAL_DIR} from
382   *     configuration as a qualified Path. Defaults to HBase root dir.
383   * @throws IOException e
384   */
385  public static Path getWALRootDir(final Configuration c) throws IOException {
386    Path p = new Path(c.get(HBASE_WAL_DIR, c.get(HConstants.HBASE_DIR)));
387    if (!isValidWALRootDir(p, c)) {
388      return getRootDir(c);
389    }
390    FileSystem fs = p.getFileSystem(c);
391    return p.makeQualified(fs.getUri(), fs.getWorkingDirectory());
392  }
393
394  @VisibleForTesting
395  public static void setWALRootDir(final Configuration c, final Path root) throws IOException {
396    c.set(HBASE_WAL_DIR, root.toString());
397  }
398
399  public static FileSystem getWALFileSystem(final Configuration c) throws IOException {
400    Path p = getWALRootDir(c);
401    FileSystem fs = p.getFileSystem(c);
402    // hadoop-core does fs caching, so need to propogate this if set
403    String enforceStreamCapability = c.get(UNSAFE_STREAM_CAPABILITY_ENFORCE);
404    if (enforceStreamCapability != null) {
405      fs.getConf().set(UNSAFE_STREAM_CAPABILITY_ENFORCE, enforceStreamCapability);
406    }
407    return fs;
408  }
409
410  private static boolean isValidWALRootDir(Path walDir, final Configuration c) throws IOException {
411    Path rootDir = getRootDir(c);
412    FileSystem fs = walDir.getFileSystem(c);
413    Path qualifiedWalDir = walDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
414    if (!qualifiedWalDir.equals(rootDir)) {
415      if (qualifiedWalDir.toString().startsWith(rootDir.toString() + "/")) {
416        throw new IllegalStateException("Illegal WAL directory specified. " +
417            "WAL directories are not permitted to be under the root directory if set.");
418      }
419    }
420    return true;
421  }
422
423  /**
424   * Returns the WAL region directory based on the given table name and region name
425   * @param conf configuration to determine WALRootDir
426   * @param tableName Table that the region is under
427   * @param encodedRegionName Region name used for creating the final region directory
428   * @return the region directory used to store WALs under the WALRootDir
429   * @throws IOException if there is an exception determining the WALRootDir
430   */
431  public static Path getWALRegionDir(final Configuration conf,
432      final TableName tableName, final String encodedRegionName)
433      throws IOException {
434    return new Path(getWALTableDir(conf, tableName),
435        encodedRegionName);
436  }
437
438  /**
439   * Returns the Table directory under the WALRootDir for the specified table name
440   * @param conf configuration used to get the WALRootDir
441   * @param tableName Table to get the directory for
442   * @return a path to the WAL table directory for the specified table
443   * @throws IOException if there is an exception determining the WALRootDir
444   */
445  public static Path getWALTableDir(final Configuration conf, final TableName tableName)
446      throws IOException {
447    return new Path(new Path(getWALRootDir(conf), tableName.getNamespaceAsString()),
448        tableName.getQualifierAsString());
449  }
450
451  /**
452   * Returns the {@link org.apache.hadoop.fs.Path} object representing the table directory under
453   * path rootdir
454   *
455   * @param rootdir qualified path of HBase root directory
456   * @param tableName name of table
457   * @return {@link org.apache.hadoop.fs.Path} for table
458   */
459  public static Path getTableDir(Path rootdir, final TableName tableName) {
460    return new Path(getNamespaceDir(rootdir, tableName.getNamespaceAsString()),
461        tableName.getQualifierAsString());
462  }
463
464  /**
465   * Returns the {@link org.apache.hadoop.hbase.TableName} object representing
466   * the table directory under
467   * path rootdir
468   *
469   * @param tablePath path of table
470   * @return {@link org.apache.hadoop.fs.Path} for table
471   */
472  public static TableName getTableName(Path tablePath) {
473    return TableName.valueOf(tablePath.getParent().getName(), tablePath.getName());
474  }
475
476  /**
477   * Returns the {@link org.apache.hadoop.fs.Path} object representing
478   * the namespace directory under path rootdir
479   *
480   * @param rootdir qualified path of HBase root directory
481   * @param namespace namespace name
482   * @return {@link org.apache.hadoop.fs.Path} for table
483   */
484  public static Path getNamespaceDir(Path rootdir, final String namespace) {
485    return new Path(rootdir, new Path(HConstants.BASE_NAMESPACE_DIR,
486        new Path(namespace)));
487  }
488
489  // this mapping means that under a federated FileSystem implementation, we'll
490  // only log the first failure from any of the underlying FileSystems at WARN and all others
491  // will be at DEBUG.
492  private static final Map<FileSystem, Boolean> warningMap =
493      new ConcurrentHashMap<FileSystem, Boolean>();
494
495  /**
496   * Sets storage policy for given path.
497   * If the passed path is a directory, we'll set the storage policy for all files
498   * created in the future in said directory. Note that this change in storage
499   * policy takes place at the FileSystem level; it will persist beyond this RS's lifecycle.
500   * If we're running on a version of FileSystem that doesn't support the given storage policy
501   * (or storage policies at all), then we'll issue a log message and continue.
502   *
503   * See http://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html
504   *
505   * @param fs We only do anything it implements a setStoragePolicy method
506   * @param path the Path whose storage policy is to be set
507   * @param storagePolicy Policy to set on <code>path</code>; see hadoop 2.6+
508   *   org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
509   *   'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
510   */
511  public static void setStoragePolicy(final FileSystem fs, final Path path,
512      final String storagePolicy) {
513    try {
514      setStoragePolicy(fs, path, storagePolicy, false);
515    } catch (IOException e) {
516      // should never arrive here
517      LOG.warn("We have chosen not to throw exception but some unexpectedly thrown out", e);
518    }
519  }
520
521  static void setStoragePolicy(final FileSystem fs, final Path path, final String storagePolicy,
522      boolean throwException) throws IOException {
523    if (storagePolicy == null) {
524      if (LOG.isTraceEnabled()) {
525        LOG.trace("We were passed a null storagePolicy, exiting early.");
526      }
527      return;
528    }
529    String trimmedStoragePolicy = storagePolicy.trim();
530    if (trimmedStoragePolicy.isEmpty()) {
531      if (LOG.isTraceEnabled()) {
532        LOG.trace("We were passed an empty storagePolicy, exiting early.");
533      }
534      return;
535    } else {
536      trimmedStoragePolicy = trimmedStoragePolicy.toUpperCase(Locale.ROOT);
537    }
538    if (trimmedStoragePolicy.equals(HConstants.DEFER_TO_HDFS_STORAGE_POLICY)) {
539      if (LOG.isTraceEnabled()) {
540        LOG.trace("We were passed the defer-to-hdfs policy {}, exiting early.",
541          trimmedStoragePolicy);
542      }
543      return;
544    }
545    try {
546      invokeSetStoragePolicy(fs, path, trimmedStoragePolicy);
547    } catch (IOException e) {
548      if (LOG.isTraceEnabled()) {
549        LOG.trace("Failed to invoke set storage policy API on FS", e);
550      }
551      if (throwException) {
552        throw e;
553      }
554    }
555  }
556
557  /*
558   * All args have been checked and are good. Run the setStoragePolicy invocation.
559   */
560  private static void invokeSetStoragePolicy(final FileSystem fs, final Path path,
561      final String storagePolicy) throws IOException {
562    Method m = null;
563    Exception toThrow = null;
564    try {
565      m = fs.getClass().getDeclaredMethod("setStoragePolicy",
566        new Class<?>[] { Path.class, String.class });
567      m.setAccessible(true);
568    } catch (NoSuchMethodException e) {
569      toThrow = e;
570      final String msg = "FileSystem doesn't support setStoragePolicy; HDFS-6584, HDFS-9345 " +
571          "not available. This is normal and expected on earlier Hadoop versions.";
572      if (!warningMap.containsKey(fs)) {
573        warningMap.put(fs, true);
574        LOG.warn(msg, e);
575      } else if (LOG.isDebugEnabled()) {
576        LOG.debug(msg, e);
577      }
578      m = null;
579    } catch (SecurityException e) {
580      toThrow = e;
581      final String msg = "No access to setStoragePolicy on FileSystem from the SecurityManager; " +
582          "HDFS-6584, HDFS-9345 not available. This is unusual and probably warrants an email " +
583          "to the user@hbase mailing list. Please be sure to include a link to your configs, and " +
584          "logs that include this message and period of time before it. Logs around service " +
585          "start up will probably be useful as well.";
586      if (!warningMap.containsKey(fs)) {
587        warningMap.put(fs, true);
588        LOG.warn(msg, e);
589      } else if (LOG.isDebugEnabled()) {
590        LOG.debug(msg, e);
591      }
592      m = null; // could happen on setAccessible() or getDeclaredMethod()
593    }
594    if (m != null) {
595      try {
596        m.invoke(fs, path, storagePolicy);
597        if (LOG.isDebugEnabled()) {
598          LOG.debug("Set storagePolicy=" + storagePolicy + " for path=" + path);
599        }
600      } catch (Exception e) {
601        toThrow = e;
602        // This swallows FNFE, should we be throwing it? seems more likely to indicate dev
603        // misuse than a runtime problem with HDFS.
604        if (!warningMap.containsKey(fs)) {
605          warningMap.put(fs, true);
606          LOG.warn("Unable to set storagePolicy=" + storagePolicy + " for path=" + path + ". " +
607              "DEBUG log level might have more details.", e);
608        } else if (LOG.isDebugEnabled()) {
609          LOG.debug("Unable to set storagePolicy=" + storagePolicy + " for path=" + path, e);
610        }
611        // check for lack of HDFS-7228
612        if (e instanceof InvocationTargetException) {
613          final Throwable exception = e.getCause();
614          if (exception instanceof RemoteException &&
615              HadoopIllegalArgumentException.class.getName().equals(
616                ((RemoteException)exception).getClassName())) {
617            if (LOG.isDebugEnabled()) {
618              LOG.debug("Given storage policy, '" +storagePolicy +"', was rejected and probably " +
619                "isn't a valid policy for the version of Hadoop you're running. I.e. if you're " +
620                "trying to use SSD related policies then you're likely missing HDFS-7228. For " +
621                "more information see the 'ArchivalStorage' docs for your Hadoop release.");
622            }
623          // Hadoop 2.8+, 3.0-a1+ added FileSystem.setStoragePolicy with a default implementation
624          // that throws UnsupportedOperationException
625          } else if (exception instanceof UnsupportedOperationException) {
626            if (LOG.isDebugEnabled()) {
627              LOG.debug("The underlying FileSystem implementation doesn't support " +
628                  "setStoragePolicy. This is probably intentional on their part, since HDFS-9345 " +
629                  "appears to be present in your version of Hadoop. For more information check " +
630                  "the Hadoop documentation on 'ArchivalStorage', the Hadoop FileSystem " +
631                  "specification docs from HADOOP-11981, and/or related documentation from the " +
632                  "provider of the underlying FileSystem (its name should appear in the " +
633                  "stacktrace that accompanies this message). Note in particular that Hadoop's " +
634                  "local filesystem implementation doesn't support storage policies.", exception);
635            }
636          }
637        }
638      }
639    }
640    if (toThrow != null) {
641      throw new IOException(toThrow);
642    }
643  }
644
645  /**
646   * @param conf must not be null
647   * @return True if this filesystem whose scheme is 'hdfs'.
648   * @throws IOException from underlying FileSystem
649   */
650  public static boolean isHDFS(final Configuration conf) throws IOException {
651    FileSystem fs = FileSystem.get(conf);
652    String scheme = fs.getUri().getScheme();
653    return scheme.equalsIgnoreCase("hdfs");
654  }
655
656  /**
657   * Checks if the given path is the one with 'recovered.edits' dir.
658   * @param path must not be null
659   * @return True if we recovered edits
660   */
661  public static boolean isRecoveredEdits(Path path) {
662    return path.toString().contains(HConstants.RECOVERED_EDITS_DIR);
663  }
664
665  /**
666   * @param conf must not be null
667   * @return Returns the filesystem of the hbase rootdir.
668   * @throws IOException from underlying FileSystem
669   */
670  public static FileSystem getCurrentFileSystem(Configuration conf)
671  throws IOException {
672    return getRootDir(conf).getFileSystem(conf);
673  }
674
675  /**
676   * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
677   * This accommodates differences between hadoop versions, where hadoop 1
678   * does not throw a FileNotFoundException, and return an empty FileStatus[]
679   * while Hadoop 2 will throw FileNotFoundException.
680   *
681   * Where possible, prefer FSUtils#listStatusWithStatusFilter(FileSystem,
682   * Path, FileStatusFilter) instead.
683   *
684   * @param fs file system
685   * @param dir directory
686   * @param filter path filter
687   * @return null if dir is empty or doesn't exist, otherwise FileStatus array
688   */
689  public static FileStatus [] listStatus(final FileSystem fs,
690      final Path dir, final PathFilter filter) throws IOException {
691    FileStatus [] status = null;
692    try {
693      status = filter == null ? fs.listStatus(dir) : fs.listStatus(dir, filter);
694    } catch (FileNotFoundException fnfe) {
695      // if directory doesn't exist, return null
696      if (LOG.isTraceEnabled()) {
697        LOG.trace(dir + " doesn't exist");
698      }
699    }
700    if (status == null || status.length < 1) {
701      return null;
702    }
703    return status;
704  }
705
706  /**
707   * Calls fs.listStatus() and treats FileNotFoundException as non-fatal
708   * This would accommodates differences between hadoop versions
709   *
710   * @param fs file system
711   * @param dir directory
712   * @return null if dir is empty or doesn't exist, otherwise FileStatus array
713   */
714  public static FileStatus[] listStatus(final FileSystem fs, final Path dir) throws IOException {
715    return listStatus(fs, dir, null);
716  }
717
718  /**
719   * Calls fs.listFiles() to get FileStatus and BlockLocations together for reducing rpc call
720   *
721   * @param fs file system
722   * @param dir directory
723   * @return LocatedFileStatus list
724   */
725  public static List<LocatedFileStatus> listLocatedStatus(final FileSystem fs,
726      final Path dir) throws IOException {
727    List<LocatedFileStatus> status = null;
728    try {
729      RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs
730          .listFiles(dir, false);
731      while (locatedFileStatusRemoteIterator.hasNext()) {
732        if (status == null) {
733          status = Lists.newArrayList();
734        }
735        status.add(locatedFileStatusRemoteIterator.next());
736      }
737    } catch (FileNotFoundException fnfe) {
738      // if directory doesn't exist, return null
739      if (LOG.isTraceEnabled()) {
740        LOG.trace(dir + " doesn't exist");
741      }
742    }
743    return status;
744  }
745
746  /**
747   * Calls fs.delete() and returns the value returned by the fs.delete()
748   *
749   * @param fs must not be null
750   * @param path must not be null
751   * @param recursive delete tree rooted at path
752   * @return the value returned by the fs.delete()
753   * @throws IOException from underlying FileSystem
754   */
755  public static boolean delete(final FileSystem fs, final Path path, final boolean recursive)
756      throws IOException {
757    return fs.delete(path, recursive);
758  }
759
760  /**
761   * Calls fs.exists(). Checks if the specified path exists
762   *
763   * @param fs must not be null
764   * @param path must not be null
765   * @return the value returned by fs.exists()
766   * @throws IOException from underlying FileSystem
767   */
768  public static boolean isExists(final FileSystem fs, final Path path) throws IOException {
769    return fs.exists(path);
770  }
771
772  /**
773   * Log the current state of the filesystem from a certain root directory
774   * @param fs filesystem to investigate
775   * @param root root file/directory to start logging from
776   * @param LOG log to output information
777   * @throws IOException if an unexpected exception occurs
778   */
779  public static void logFileSystemState(final FileSystem fs, final Path root, Logger LOG)
780      throws IOException {
781    LOG.debug("File system contents for path " + root);
782    logFSTree(LOG, fs, root, "|-");
783  }
784
785  /**
786   * Recursive helper to log the state of the FS
787   *
788   * @see #logFileSystemState(FileSystem, Path, Logger)
789   */
790  private static void logFSTree(Logger LOG, final FileSystem fs, final Path root, String prefix)
791      throws IOException {
792    FileStatus[] files = listStatus(fs, root, null);
793    if (files == null) {
794      return;
795    }
796
797    for (FileStatus file : files) {
798      if (file.isDirectory()) {
799        LOG.debug(prefix + file.getPath().getName() + "/");
800        logFSTree(LOG, fs, file.getPath(), prefix + "---");
801      } else {
802        LOG.debug(prefix + file.getPath().getName());
803      }
804    }
805  }
806
807  public static boolean renameAndSetModifyTime(final FileSystem fs, final Path src, final Path dest)
808      throws IOException {
809    // set the modify time for TimeToLive Cleaner
810    fs.setTimes(src, EnvironmentEdgeManager.currentTime(), -1);
811    return fs.rename(src, dest);
812  }
813
814  /**
815   * Do our short circuit read setup.
816   * Checks buffer size to use and whether to do checksumming in hbase or hdfs.
817   * @param conf must not be null
818   */
819  public static void setupShortCircuitRead(final Configuration conf) {
820    // Check that the user has not set the "dfs.client.read.shortcircuit.skip.checksum" property.
821    boolean shortCircuitSkipChecksum =
822      conf.getBoolean("dfs.client.read.shortcircuit.skip.checksum", false);
823    boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
824    if (shortCircuitSkipChecksum) {
825      LOG.warn("Configuration \"dfs.client.read.shortcircuit.skip.checksum\" should not " +
826        "be set to true." + (useHBaseChecksum ? " HBase checksum doesn't require " +
827        "it, see https://issues.apache.org/jira/browse/HBASE-6868." : ""));
828      assert !shortCircuitSkipChecksum; //this will fail if assertions are on
829    }
830    checkShortCircuitReadBufferSize(conf);
831  }
832
833  /**
834   * Check if short circuit read buffer size is set and if not, set it to hbase value.
835   * @param conf must not be null
836   */
837  public static void checkShortCircuitReadBufferSize(final Configuration conf) {
838    final int defaultSize = HConstants.DEFAULT_BLOCKSIZE * 2;
839    final int notSet = -1;
840    // DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY is only defined in h2
841    final String dfsKey = "dfs.client.read.shortcircuit.buffer.size";
842    int size = conf.getInt(dfsKey, notSet);
843    // If a size is set, return -- we will use it.
844    if (size != notSet) {
845      return;
846    }
847    // But short circuit buffer size is normally not set.  Put in place the hbase wanted size.
848    int hbaseSize = conf.getInt("hbase." + dfsKey, defaultSize);
849    conf.setIfUnset(dfsKey, Integer.toString(hbaseSize));
850  }
851
852  private static class DfsBuilderUtility {
853    static Class<?> dfsClass = null;
854    static Method createMethod;
855    static Method overwriteMethod;
856    static Method bufferSizeMethod;
857    static Method blockSizeMethod;
858    static Method recursiveMethod;
859    static Method replicateMethod;
860    static Method replicationMethod;
861    static Method buildMethod;
862    static boolean allMethodsPresent = false;
863
864    static {
865      String dfsName = "org.apache.hadoop.hdfs.DistributedFileSystem";
866      String builderName = dfsName + "$HdfsDataOutputStreamBuilder";
867      Class<?> builderClass = null;
868
869      try {
870        dfsClass = Class.forName(dfsName);
871      } catch (ClassNotFoundException e) {
872        LOG.debug("{} not available, will not use builder API for file creation.", dfsName);
873      }
874      try {
875        builderClass = Class.forName(builderName);
876      } catch (ClassNotFoundException e) {
877        LOG.debug("{} not available, will not use builder API for file creation.", builderName);
878      }
879
880      if (dfsClass != null && builderClass != null) {
881        try {
882          createMethod = dfsClass.getMethod("createFile", Path.class);
883          overwriteMethod = builderClass.getMethod("overwrite", boolean.class);
884          bufferSizeMethod = builderClass.getMethod("bufferSize", int.class);
885          blockSizeMethod = builderClass.getMethod("blockSize", long.class);
886          recursiveMethod = builderClass.getMethod("recursive");
887          replicateMethod = builderClass.getMethod("replicate");
888          replicationMethod = builderClass.getMethod("replication", short.class);
889          buildMethod = builderClass.getMethod("build");
890
891          allMethodsPresent = true;
892          LOG.debug("Using builder API via reflection for DFS file creation.");
893        } catch (NoSuchMethodException e) {
894          LOG.debug("Could not find method on builder; will use old DFS API for file creation {}",
895              e.getMessage());
896        }
897      }
898    }
899
900    /**
901     * Attempt to use builder API via reflection to create a file with the given parameters and
902     * replication enabled.
903     */
904    static FSDataOutputStream createHelper(FileSystem fs, Path path, boolean overwritable,
905        int bufferSize, short replication, long blockSize, boolean isRecursive) throws IOException {
906      if (allMethodsPresent && dfsClass.isInstance(fs)) {
907        try {
908          Object builder;
909
910          builder = createMethod.invoke(fs, path);
911          builder = overwriteMethod.invoke(builder, overwritable);
912          builder = bufferSizeMethod.invoke(builder, bufferSize);
913          builder = blockSizeMethod.invoke(builder, blockSize);
914          if (isRecursive) {
915            builder = recursiveMethod.invoke(builder);
916          }
917          builder = replicateMethod.invoke(builder);
918          builder = replicationMethod.invoke(builder, replication);
919          return (FSDataOutputStream) buildMethod.invoke(builder);
920        } catch (IllegalAccessException | InvocationTargetException e) {
921          // Should have caught this failure during initialization, so log full trace here
922          LOG.warn("Couldn't use reflection with builder API", e);
923        }
924      }
925
926      if (isRecursive) {
927        return fs.create(path, overwritable, bufferSize, replication, blockSize, null);
928      }
929      return fs.createNonRecursive(path, overwritable, bufferSize, replication, blockSize, null);
930    }
931
932    /**
933     * Attempt to use builder API via reflection to create a file with the given parameters and
934     * replication enabled.
935     */
936    static FSDataOutputStream createHelper(FileSystem fs, Path path, boolean overwritable)
937        throws IOException {
938      if (allMethodsPresent && dfsClass.isInstance(fs)) {
939        try {
940          Object builder;
941
942          builder = createMethod.invoke(fs, path);
943          builder = overwriteMethod.invoke(builder, overwritable);
944          builder = replicateMethod.invoke(builder);
945          return (FSDataOutputStream) buildMethod.invoke(builder);
946        } catch (IllegalAccessException | InvocationTargetException e) {
947          // Should have caught this failure during initialization, so log full trace here
948          LOG.warn("Couldn't use reflection with builder API", e);
949        }
950      }
951
952      return fs.create(path, overwritable);
953    }
954  }
955
956  /**
957   * Attempt to use builder API via reflection to create a file with the given parameters and
958   * replication enabled.
959   * <p>
960   * Will not attempt to enable replication when passed an HFileSystem.
961   */
962  public static FSDataOutputStream createForWal(FileSystem fs, Path path, boolean overwritable)
963      throws IOException {
964    return DfsBuilderUtility.createHelper(fs, path, overwritable);
965  }
966
967  /**
968   * Attempt to use builder API via reflection to create a file with the given parameters and
969   * replication enabled.
970   * <p>
971   * Will not attempt to enable replication when passed an HFileSystem.
972   */
973  public static FSDataOutputStream createForWal(FileSystem fs, Path path, boolean overwritable,
974      int bufferSize, short replication, long blockSize, boolean isRecursive) throws IOException {
975    return DfsBuilderUtility.createHelper(fs, path, overwritable, bufferSize, replication,
976        blockSize, isRecursive);
977  }
978
979  // Holder singleton idiom. JVM spec ensures this will be run at most once per Classloader, and
980  // not until we attempt to reference it.
981  private static class StreamCapabilities {
982    public static final boolean PRESENT;
983    public static final Class<?> CLASS;
984    public static final Method METHOD;
985    static {
986      boolean tmp = false;
987      Class<?> clazz = null;
988      Method method = null;
989      try {
990        clazz = Class.forName("org.apache.hadoop.fs.StreamCapabilities");
991        method = clazz.getMethod("hasCapability", String.class);
992        tmp = true;
993      } catch(ClassNotFoundException|NoSuchMethodException|SecurityException exception) {
994        LOG.warn("Your Hadoop installation does not include the StreamCapabilities class from " +
995                 "HDFS-11644, so we will skip checking if any FSDataOutputStreams actually " +
996                 "support hflush/hsync. If you are running on top of HDFS this probably just " +
997                 "means you have an older version and this can be ignored. If you are running on " +
998                 "top of an alternate FileSystem implementation you should manually verify that " +
999                 "hflush and hsync are implemented; otherwise you risk data loss and hard to " +
1000                 "diagnose errors when our assumptions are violated.");
1001        LOG.debug("The first request to check for StreamCapabilities came from this stacktrace.",
1002            exception);
1003      } finally {
1004        PRESENT = tmp;
1005        CLASS = clazz;
1006        METHOD = method;
1007      }
1008    }
1009  }
1010
1011  /**
1012   * If our FileSystem version includes the StreamCapabilities class, check if
1013   * the given stream has a particular capability.
1014   * @param stream capabilities are per-stream instance, so check this one specifically. must not be
1015   *        null
1016   * @param capability what to look for, per Hadoop Common's FileSystem docs
1017   * @return true if there are no StreamCapabilities. false if there are, but this stream doesn't
1018   *         implement it. return result of asking the stream otherwise.
1019   */
1020  public static boolean hasCapability(FSDataOutputStream stream, String capability) {
1021    // be consistent whether or not StreamCapabilities is present
1022    if (stream == null) {
1023      throw new NullPointerException("stream parameter must not be null.");
1024    }
1025    // If o.a.h.fs.StreamCapabilities doesn't exist, assume everyone does everything
1026    // otherwise old versions of Hadoop will break.
1027    boolean result = true;
1028    if (StreamCapabilities.PRESENT) {
1029      // if StreamCapabilities is present, but the stream doesn't implement it
1030      // or we run into a problem invoking the method,
1031      // we treat that as equivalent to not declaring anything
1032      result = false;
1033      if (StreamCapabilities.CLASS.isAssignableFrom(stream.getClass())) {
1034        try {
1035          result = ((Boolean)StreamCapabilities.METHOD.invoke(stream, capability)).booleanValue();
1036        } catch (IllegalAccessException|IllegalArgumentException|InvocationTargetException
1037            exception) {
1038          LOG.warn("Your Hadoop installation's StreamCapabilities implementation doesn't match " +
1039              "our understanding of how it's supposed to work. Please file a JIRA and include " +
1040              "the following stack trace. In the mean time we're interpreting this behavior " +
1041              "difference as a lack of capability support, which will probably cause a failure.",
1042              exception);
1043        }
1044      }
1045    }
1046    return result;
1047  }
1048
1049  /**
1050   * Helper exception for those cases where the place where we need to check a stream capability
1051   * is not where we have the needed context to explain the impact and mitigation for a lack.
1052   */
1053  public static class StreamLacksCapabilityException extends Exception {
1054    public StreamLacksCapabilityException(String message, Throwable cause) {
1055      super(message, cause);
1056    }
1057    public StreamLacksCapabilityException(String message) {
1058      super(message);
1059    }
1060  }
1061
1062}