001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.regionserver;
021
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.util.concurrent.atomic.AtomicInteger;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.FileStatus;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.HDFSBlocksDistribution;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
037import org.apache.hadoop.hbase.io.HFileLink;
038import org.apache.hadoop.hbase.io.HalfStoreFileReader;
039import org.apache.hadoop.hbase.io.Reference;
040import org.apache.hadoop.hbase.io.hfile.CacheConfig;
041import org.apache.hadoop.hbase.util.FSUtils;
042
043/**
044 * Describe a StoreFile (hfile, reference, link)
045 */
046@InterfaceAudience.Private
047public class StoreFileInfo {
048  private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class);
049
050  /**
051   * A non-capture group, for hfiles, so that this can be embedded.
052   * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
053   * The mob del file has (_del) as suffix.
054   */
055  public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
056
057  /** Regex that will work for hfiles */
058  private static final Pattern HFILE_NAME_PATTERN =
059    Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
060
061  /**
062   * A non-capture group, for del files, so that this can be embedded.
063   * A del file has (_del) as suffix.
064   */
065  public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)";
066
067  /** Regex that will work for del files */
068  private static final Pattern DELFILE_NAME_PATTERN =
069    Pattern.compile("^(" + DELFILE_NAME_REGEX + ")");
070
071  /**
072   * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>})
073   * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>})
074   * If reference, then the regex has more than just one group.
075   * Group 1, hfile/hfilelink pattern, is this file's id.
076   * Group 2 '(.+)' is the reference's parent region name.
077   */
078  private static final Pattern REF_NAME_PATTERN =
079    Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
080      HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
081
082  // Configuration
083  private Configuration conf;
084
085  // FileSystem handle
086  private final FileSystem fs;
087
088  // HDFS blocks distribution information
089  private HDFSBlocksDistribution hdfsBlocksDistribution = null;
090
091  // If this storefile references another, this is the reference instance.
092  private final Reference reference;
093
094  // If this storefile is a link to another, this is the link instance.
095  private final HFileLink link;
096
097  private final Path initialPath;
098
099  private RegionCoprocessorHost coprocessorHost;
100
101  // timestamp on when the file was created, is 0 and ignored for reference or link files
102  private long createdTimestamp;
103
104  /**
105   * Create a Store File Info
106   * @param conf the {@link Configuration} to use
107   * @param fs The current file system to use.
108   * @param initialPath The {@link Path} of the file
109   */
110  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
111      throws IOException {
112    assert fs != null;
113    assert initialPath != null;
114    assert conf != null;
115
116    this.fs = fs;
117    this.conf = conf;
118    this.initialPath = initialPath;
119    Path p = initialPath;
120    if (HFileLink.isHFileLink(p)) {
121      // HFileLink
122      this.reference = null;
123      this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
124      if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
125    } else if (isReference(p)) {
126      this.reference = Reference.read(fs, p);
127      Path referencePath = getReferredToFile(p);
128      if (HFileLink.isHFileLink(referencePath)) {
129        // HFileLink Reference
130        this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
131      } else {
132        // Reference
133        this.link = null;
134      }
135      if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
136              " reference to " + referencePath);
137    } else if (isHFile(p)) {
138      // HFile
139      this.createdTimestamp = fs.getFileStatus(initialPath).getModificationTime();
140      this.reference = null;
141      this.link = null;
142    } else {
143      throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
144    }
145  }
146
147  /**
148   * Create a Store File Info
149   * @param conf the {@link Configuration} to use
150   * @param fs The current file system to use.
151   * @param fileStatus The {@link FileStatus} of the file
152   */
153  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
154      throws IOException {
155    this(conf, fs, fileStatus.getPath());
156  }
157
158  /**
159   * Create a Store File Info from an HFileLink
160   * @param conf The {@link Configuration} to use
161   * @param fs The current file system to use
162   * @param fileStatus The {@link FileStatus} of the file
163   */
164  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
165      final HFileLink link) {
166    this.fs = fs;
167    this.conf = conf;
168    // initialPath can be null only if we get a link.
169    this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
170      // HFileLink
171    this.reference = null;
172    this.link = link;
173  }
174
175  /**
176   * Create a Store File Info from an HFileLink
177   * @param conf The {@link Configuration} to use
178   * @param fs The current file system to use
179   * @param fileStatus The {@link FileStatus} of the file
180   * @param reference The reference instance
181   */
182  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
183      final Reference reference) {
184    this.fs = fs;
185    this.conf = conf;
186    this.initialPath = fileStatus.getPath();
187    this.createdTimestamp = fileStatus.getModificationTime();
188    this.reference = reference;
189    this.link = null;
190  }
191
192  /**
193   * Create a Store File Info from an HFileLink and a Reference
194   * @param conf The {@link Configuration} to use
195   * @param fs The current file system to use
196   * @param fileStatus The {@link FileStatus} of the file
197   * @param reference The reference instance
198   * @param link The link instance
199   */
200  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
201      final Reference reference, final HFileLink link) {
202    this.fs = fs;
203    this.conf = conf;
204    this.initialPath = fileStatus.getPath();
205    this.createdTimestamp = fileStatus.getModificationTime();
206    this.reference = reference;
207    this.link = link;
208  }
209
210  /**
211   * Sets the region coprocessor env.
212   * @param coprocessorHost
213   */
214  public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
215    this.coprocessorHost = coprocessorHost;
216  }
217
218  /*
219   * @return the Reference object associated to this StoreFileInfo.
220   *         null if the StoreFile is not a reference.
221   */
222  public Reference getReference() {
223    return this.reference;
224  }
225
226  /** @return True if the store file is a Reference */
227  public boolean isReference() {
228    return this.reference != null;
229  }
230
231  /** @return True if the store file is a top Reference */
232  public boolean isTopReference() {
233    return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
234  }
235
236  /** @return True if the store file is a link */
237  public boolean isLink() {
238    return this.link != null && this.reference == null;
239  }
240
241  /** @return the HDFS block distribution */
242  public HDFSBlocksDistribution getHDFSBlockDistribution() {
243    return this.hdfsBlocksDistribution;
244  }
245
246  /**
247   * Open a Reader for the StoreFile
248   * @param fs The current file system to use.
249   * @param cacheConf The cache configuration and block cache reference.
250   * @return The StoreFile.Reader for the file
251   */
252  public StoreFileReader open(FileSystem fs, CacheConfig cacheConf, boolean canUseDropBehind,
253      long readahead, boolean isPrimaryReplicaStoreFile, AtomicInteger refCount, boolean shared)
254      throws IOException {
255    FSDataInputStreamWrapper in;
256    FileStatus status;
257
258    final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
259    if (this.link != null) {
260      // HFileLink
261      in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead);
262      status = this.link.getFileStatus(fs);
263    } else if (this.reference != null) {
264      // HFile Reference
265      Path referencePath = getReferredToFile(this.getPath());
266      in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
267      status = fs.getFileStatus(referencePath);
268    } else {
269      in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
270      status = fs.getFileStatus(initialPath);
271    }
272    long length = status.getLen();
273    hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
274
275    StoreFileReader reader = null;
276    if (this.coprocessorHost != null) {
277      reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
278        cacheConf, reference);
279    }
280    if (reader == null) {
281      if (this.reference != null) {
282        reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
283            isPrimaryReplicaStoreFile, refCount, shared, conf);
284      } else {
285        reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf,
286            isPrimaryReplicaStoreFile, refCount, shared, conf);
287      }
288    }
289    if (this.coprocessorHost != null) {
290      reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
291        cacheConf, reference, reader);
292    }
293    return reader;
294  }
295
296  /**
297   * Compute the HDFS Block Distribution for this StoreFile
298   */
299  public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
300      throws IOException {
301    // guard against the case where we get the FileStatus from link, but by the time we
302    // call compute the file is moved again
303    if (this.link != null) {
304      FileNotFoundException exToThrow = null;
305      for (int i = 0; i < this.link.getLocations().length; i++) {
306        try {
307          return computeHDFSBlocksDistributionInternal(fs);
308        } catch (FileNotFoundException ex) {
309          // try the other location
310          exToThrow = ex;
311        }
312      }
313      throw exToThrow;
314    } else {
315      return computeHDFSBlocksDistributionInternal(fs);
316    }
317  }
318
319  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
320      throws IOException {
321    FileStatus status = getReferencedFileStatus(fs);
322    if (this.reference != null) {
323      return computeRefFileHDFSBlockDistribution(fs, reference, status);
324    } else {
325      return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
326    }
327  }
328
329  /**
330   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
331   * @param fs The current file system to use.
332   * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
333   */
334  public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
335    FileStatus status;
336    if (this.reference != null) {
337      if (this.link != null) {
338        FileNotFoundException exToThrow = null;
339        for (int i = 0; i < this.link.getLocations().length; i++) {
340          // HFileLink Reference
341          try {
342            return link.getFileStatus(fs);
343          } catch (FileNotFoundException ex) {
344            // try the other location
345            exToThrow = ex;
346          }
347        }
348        throw exToThrow;
349      } else {
350        // HFile Reference
351        Path referencePath = getReferredToFile(this.getPath());
352        status = fs.getFileStatus(referencePath);
353      }
354    } else {
355      if (this.link != null) {
356        FileNotFoundException exToThrow = null;
357        for (int i = 0; i < this.link.getLocations().length; i++) {
358          // HFileLink
359          try {
360            return link.getFileStatus(fs);
361          } catch (FileNotFoundException ex) {
362            // try the other location
363            exToThrow = ex;
364          }
365        }
366        throw exToThrow;
367      } else {
368        status = fs.getFileStatus(initialPath);
369      }
370    }
371    return status;
372  }
373
374  /** @return The {@link Path} of the file */
375  public Path getPath() {
376    return initialPath;
377  }
378
379  /** @return The {@link FileStatus} of the file */
380  public FileStatus getFileStatus() throws IOException {
381    return getReferencedFileStatus(fs);
382  }
383
384  /** @return Get the modification time of the file. */
385  public long getModificationTime() throws IOException {
386    return getFileStatus().getModificationTime();
387  }
388
389  @Override
390  public String toString() {
391    return this.getPath() +
392      (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
393  }
394
395  /**
396   * @param path Path to check.
397   * @return True if the path has format of a HFile.
398   */
399  public static boolean isHFile(final Path path) {
400    return isHFile(path.getName());
401  }
402
403  public static boolean isHFile(final String fileName) {
404    Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
405    return m.matches() && m.groupCount() > 0;
406  }
407
408  /**
409   * @param path Path to check.
410   * @return True if the path has format of a del file.
411   */
412  public static boolean isDelFile(final Path path) {
413    return isDelFile(path.getName());
414  }
415
416  /**
417   * @param fileName Sting version of path to validate.
418   * @return True if the file name has format of a del file.
419   */
420  public static boolean isDelFile(final String fileName) {
421    Matcher m = DELFILE_NAME_PATTERN.matcher(fileName);
422    return m.matches() && m.groupCount() > 0;
423  }
424
425  /**
426   * @param path Path to check.
427   * @return True if the path has format of a HStoreFile reference.
428   */
429  public static boolean isReference(final Path path) {
430    return isReference(path.getName());
431  }
432
433  /**
434   * @param name file name to check.
435   * @return True if the path has format of a HStoreFile reference.
436   */
437  public static boolean isReference(final String name) {
438    Matcher m = REF_NAME_PATTERN.matcher(name);
439    return m.matches() && m.groupCount() > 1;
440  }
441
442  /**
443   * @return timestamp when this file was created (as returned by filesystem)
444   */
445  public long getCreatedTimestamp() {
446    return createdTimestamp;
447  }
448
449  /*
450   * Return path to the file referred to by a Reference.  Presumes a directory
451   * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
452   * @param p Path to a Reference file.
453   * @return Calculated path to parent region file.
454   * @throws IllegalArgumentException when path regex fails to match.
455   */
456  public static Path getReferredToFile(final Path p) {
457    Matcher m = REF_NAME_PATTERN.matcher(p.getName());
458    if (m == null || !m.matches()) {
459      LOG.warn("Failed match of store file name " + p.toString());
460      throw new IllegalArgumentException("Failed match of store file name " +
461          p.toString());
462    }
463
464    // Other region name is suffix on the passed Reference file name
465    String otherRegion = m.group(2);
466    // Tabledir is up two directories from where Reference was written.
467    Path tableDir = p.getParent().getParent().getParent();
468    String nameStrippedOfSuffix = m.group(1);
469    if (LOG.isTraceEnabled()) {
470      LOG.trace("reference '" + p + "' to region=" + otherRegion
471        + " hfile=" + nameStrippedOfSuffix);
472    }
473
474    // Build up new path with the referenced region in place of our current
475    // region in the reference path.  Also strip regionname suffix from name.
476    return new Path(new Path(new Path(tableDir, otherRegion),
477      p.getParent().getName()), nameStrippedOfSuffix);
478  }
479
480  /**
481   * Validate the store file name.
482   * @param fileName name of the file to validate
483   * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
484   */
485  public static boolean validateStoreFileName(final String fileName) {
486    if (HFileLink.isHFileLink(fileName) || isReference(fileName))
487      return(true);
488    return !fileName.contains("-");
489  }
490
491  /**
492   * Return if the specified file is a valid store file or not.
493   * @param fileStatus The {@link FileStatus} of the file
494   * @return <tt>true</tt> if the file is valid
495   */
496  public static boolean isValid(final FileStatus fileStatus)
497      throws IOException {
498    final Path p = fileStatus.getPath();
499
500    if (fileStatus.isDirectory())
501      return false;
502
503    // Check for empty hfile. Should never be the case but can happen
504    // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
505    // NOTE: that the HFileLink is just a name, so it's an empty file.
506    if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
507      LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
508      return false;
509    }
510
511    return validateStoreFileName(p.getName());
512  }
513
514  /**
515   * helper function to compute HDFS blocks distribution of a given reference
516   * file.For reference file, we don't compute the exact value. We use some
517   * estimate instead given it might be good enough. we assume bottom part
518   * takes the first half of reference file, top part takes the second half
519   * of the reference file. This is just estimate, given
520   * midkey ofregion != midkey of HFile, also the number and size of keys vary.
521   * If this estimate isn't good enough, we can improve it later.
522   * @param fs  The FileSystem
523   * @param reference  The reference
524   * @param status  The reference FileStatus
525   * @return HDFS blocks distribution
526   */
527  private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
528      final FileSystem fs, final Reference reference, final FileStatus status)
529      throws IOException {
530    if (status == null) {
531      return null;
532    }
533
534    long start = 0;
535    long length = 0;
536
537    if (Reference.isTopFileRegion(reference.getFileRegion())) {
538      start = status.getLen()/2;
539      length = status.getLen() - status.getLen()/2;
540    } else {
541      start = 0;
542      length = status.getLen()/2;
543    }
544    return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
545  }
546
547  @Override
548  public boolean equals(Object that) {
549    if (this == that) return true;
550    if (that == null) return false;
551
552    if (!(that instanceof StoreFileInfo)) return false;
553
554    StoreFileInfo o = (StoreFileInfo)that;
555    if (initialPath != null && o.initialPath == null) return false;
556    if (initialPath == null && o.initialPath != null) return false;
557    if (initialPath != o.initialPath && initialPath != null
558            && !initialPath.equals(o.initialPath)) return false;
559
560    if (reference != null && o.reference == null) return false;
561    if (reference == null && o.reference != null) return false;
562    if (reference != o.reference && reference != null
563            && !reference.equals(o.reference)) return false;
564
565    if (link != null && o.link == null) return false;
566    if (link == null && o.link != null) return false;
567    if (link != o.link && link != null && !link.equals(o.link)) return false;
568
569    return true;
570  };
571
572
573  @Override
574  public int hashCode() {
575    int hash = 17;
576    hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
577    hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
578    hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
579    return  hash;
580  }
581
582  /**
583   * Return the active file name that contains the real data.
584   * <p>
585   * For referenced hfile, we will return the name of the reference file as it will be used to
586   * construct the StoreFileReader. And for linked hfile, we will return the name of the file being
587   * linked.
588   */
589  public String getActiveFileName() {
590    if (reference != null || link == null) {
591      return initialPath.getName();
592    } else {
593      return HFileLink.getReferencedHFileName(initialPath.getName());
594    }
595  }
596}