001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.regionserver; 021 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.util.concurrent.atomic.AtomicInteger; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileStatus; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HDFSBlocksDistribution; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 037import org.apache.hadoop.hbase.io.HFileLink; 038import org.apache.hadoop.hbase.io.HalfStoreFileReader; 039import org.apache.hadoop.hbase.io.Reference; 040import org.apache.hadoop.hbase.io.hfile.CacheConfig; 041import org.apache.hadoop.hbase.util.FSUtils; 042 043/** 044 * Describe a StoreFile (hfile, reference, link) 045 */ 046@InterfaceAudience.Private 047public class StoreFileInfo { 048 private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class); 049 050 /** 051 * A non-capture group, for hfiles, so that this can be embedded. 052 * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix. 053 * The mob del file has (_del) as suffix. 054 */ 055 public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?"; 056 057 /** Regex that will work for hfiles */ 058 private static final Pattern HFILE_NAME_PATTERN = 059 Pattern.compile("^(" + HFILE_NAME_REGEX + ")"); 060 061 /** 062 * A non-capture group, for del files, so that this can be embedded. 063 * A del file has (_del) as suffix. 064 */ 065 public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)"; 066 067 /** Regex that will work for del files */ 068 private static final Pattern DELFILE_NAME_PATTERN = 069 Pattern.compile("^(" + DELFILE_NAME_REGEX + ")"); 070 071 /** 072 * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>}) 073 * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>}) 074 * If reference, then the regex has more than just one group. 075 * Group 1, hfile/hfilelink pattern, is this file's id. 076 * Group 2 '(.+)' is the reference's parent region name. 077 */ 078 private static final Pattern REF_NAME_PATTERN = 079 Pattern.compile(String.format("^(%s|%s)\\.(.+)$", 080 HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX)); 081 082 // Configuration 083 private Configuration conf; 084 085 // FileSystem handle 086 private final FileSystem fs; 087 088 // HDFS blocks distribution information 089 private HDFSBlocksDistribution hdfsBlocksDistribution = null; 090 091 // If this storefile references another, this is the reference instance. 092 private final Reference reference; 093 094 // If this storefile is a link to another, this is the link instance. 095 private final HFileLink link; 096 097 private final Path initialPath; 098 099 private RegionCoprocessorHost coprocessorHost; 100 101 // timestamp on when the file was created, is 0 and ignored for reference or link files 102 private long createdTimestamp; 103 104 /** 105 * Create a Store File Info 106 * @param conf the {@link Configuration} to use 107 * @param fs The current file system to use. 108 * @param initialPath The {@link Path} of the file 109 */ 110 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath) 111 throws IOException { 112 assert fs != null; 113 assert initialPath != null; 114 assert conf != null; 115 116 this.fs = fs; 117 this.conf = conf; 118 this.initialPath = initialPath; 119 Path p = initialPath; 120 if (HFileLink.isHFileLink(p)) { 121 // HFileLink 122 this.reference = null; 123 this.link = HFileLink.buildFromHFileLinkPattern(conf, p); 124 if (LOG.isTraceEnabled()) LOG.trace(p + " is a link"); 125 } else if (isReference(p)) { 126 this.reference = Reference.read(fs, p); 127 Path referencePath = getReferredToFile(p); 128 if (HFileLink.isHFileLink(referencePath)) { 129 // HFileLink Reference 130 this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath); 131 } else { 132 // Reference 133 this.link = null; 134 } 135 if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() + 136 " reference to " + referencePath); 137 } else if (isHFile(p)) { 138 // HFile 139 this.createdTimestamp = fs.getFileStatus(initialPath).getModificationTime(); 140 this.reference = null; 141 this.link = null; 142 } else { 143 throw new IOException("path=" + p + " doesn't look like a valid StoreFile"); 144 } 145 } 146 147 /** 148 * Create a Store File Info 149 * @param conf the {@link Configuration} to use 150 * @param fs The current file system to use. 151 * @param fileStatus The {@link FileStatus} of the file 152 */ 153 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus) 154 throws IOException { 155 this(conf, fs, fileStatus.getPath()); 156 } 157 158 /** 159 * Create a Store File Info from an HFileLink 160 * @param conf The {@link Configuration} to use 161 * @param fs The current file system to use 162 * @param fileStatus The {@link FileStatus} of the file 163 */ 164 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 165 final HFileLink link) { 166 this.fs = fs; 167 this.conf = conf; 168 // initialPath can be null only if we get a link. 169 this.initialPath = (fileStatus == null) ? null : fileStatus.getPath(); 170 // HFileLink 171 this.reference = null; 172 this.link = link; 173 } 174 175 /** 176 * Create a Store File Info from an HFileLink 177 * @param conf The {@link Configuration} to use 178 * @param fs The current file system to use 179 * @param fileStatus The {@link FileStatus} of the file 180 * @param reference The reference instance 181 */ 182 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 183 final Reference reference) { 184 this.fs = fs; 185 this.conf = conf; 186 this.initialPath = fileStatus.getPath(); 187 this.createdTimestamp = fileStatus.getModificationTime(); 188 this.reference = reference; 189 this.link = null; 190 } 191 192 /** 193 * Create a Store File Info from an HFileLink and a Reference 194 * @param conf The {@link Configuration} to use 195 * @param fs The current file system to use 196 * @param fileStatus The {@link FileStatus} of the file 197 * @param reference The reference instance 198 * @param link The link instance 199 */ 200 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 201 final Reference reference, final HFileLink link) { 202 this.fs = fs; 203 this.conf = conf; 204 this.initialPath = fileStatus.getPath(); 205 this.createdTimestamp = fileStatus.getModificationTime(); 206 this.reference = reference; 207 this.link = link; 208 } 209 210 /** 211 * Sets the region coprocessor env. 212 * @param coprocessorHost 213 */ 214 public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) { 215 this.coprocessorHost = coprocessorHost; 216 } 217 218 /* 219 * @return the Reference object associated to this StoreFileInfo. 220 * null if the StoreFile is not a reference. 221 */ 222 public Reference getReference() { 223 return this.reference; 224 } 225 226 /** @return True if the store file is a Reference */ 227 public boolean isReference() { 228 return this.reference != null; 229 } 230 231 /** @return True if the store file is a top Reference */ 232 public boolean isTopReference() { 233 return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion()); 234 } 235 236 /** @return True if the store file is a link */ 237 public boolean isLink() { 238 return this.link != null && this.reference == null; 239 } 240 241 /** @return the HDFS block distribution */ 242 public HDFSBlocksDistribution getHDFSBlockDistribution() { 243 return this.hdfsBlocksDistribution; 244 } 245 246 /** 247 * Open a Reader for the StoreFile 248 * @param fs The current file system to use. 249 * @param cacheConf The cache configuration and block cache reference. 250 * @return The StoreFile.Reader for the file 251 */ 252 public StoreFileReader open(FileSystem fs, CacheConfig cacheConf, boolean canUseDropBehind, 253 long readahead, boolean isPrimaryReplicaStoreFile, AtomicInteger refCount, boolean shared) 254 throws IOException { 255 FSDataInputStreamWrapper in; 256 FileStatus status; 257 258 final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction(); 259 if (this.link != null) { 260 // HFileLink 261 in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead); 262 status = this.link.getFileStatus(fs); 263 } else if (this.reference != null) { 264 // HFile Reference 265 Path referencePath = getReferredToFile(this.getPath()); 266 in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead); 267 status = fs.getFileStatus(referencePath); 268 } else { 269 in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead); 270 status = fs.getFileStatus(initialPath); 271 } 272 long length = status.getLen(); 273 hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs); 274 275 StoreFileReader reader = null; 276 if (this.coprocessorHost != null) { 277 reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length, 278 cacheConf, reference); 279 } 280 if (reader == null) { 281 if (this.reference != null) { 282 reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference, 283 isPrimaryReplicaStoreFile, refCount, shared, conf); 284 } else { 285 reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf, 286 isPrimaryReplicaStoreFile, refCount, shared, conf); 287 } 288 } 289 if (this.coprocessorHost != null) { 290 reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length, 291 cacheConf, reference, reader); 292 } 293 return reader; 294 } 295 296 /** 297 * Compute the HDFS Block Distribution for this StoreFile 298 */ 299 public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) 300 throws IOException { 301 // guard against the case where we get the FileStatus from link, but by the time we 302 // call compute the file is moved again 303 if (this.link != null) { 304 FileNotFoundException exToThrow = null; 305 for (int i = 0; i < this.link.getLocations().length; i++) { 306 try { 307 return computeHDFSBlocksDistributionInternal(fs); 308 } catch (FileNotFoundException ex) { 309 // try the other location 310 exToThrow = ex; 311 } 312 } 313 throw exToThrow; 314 } else { 315 return computeHDFSBlocksDistributionInternal(fs); 316 } 317 } 318 319 private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs) 320 throws IOException { 321 FileStatus status = getReferencedFileStatus(fs); 322 if (this.reference != null) { 323 return computeRefFileHDFSBlockDistribution(fs, reference, status); 324 } else { 325 return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen()); 326 } 327 } 328 329 /** 330 * Get the {@link FileStatus} of the file referenced by this StoreFileInfo 331 * @param fs The current file system to use. 332 * @return The {@link FileStatus} of the file referenced by this StoreFileInfo 333 */ 334 public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException { 335 FileStatus status; 336 if (this.reference != null) { 337 if (this.link != null) { 338 FileNotFoundException exToThrow = null; 339 for (int i = 0; i < this.link.getLocations().length; i++) { 340 // HFileLink Reference 341 try { 342 return link.getFileStatus(fs); 343 } catch (FileNotFoundException ex) { 344 // try the other location 345 exToThrow = ex; 346 } 347 } 348 throw exToThrow; 349 } else { 350 // HFile Reference 351 Path referencePath = getReferredToFile(this.getPath()); 352 status = fs.getFileStatus(referencePath); 353 } 354 } else { 355 if (this.link != null) { 356 FileNotFoundException exToThrow = null; 357 for (int i = 0; i < this.link.getLocations().length; i++) { 358 // HFileLink 359 try { 360 return link.getFileStatus(fs); 361 } catch (FileNotFoundException ex) { 362 // try the other location 363 exToThrow = ex; 364 } 365 } 366 throw exToThrow; 367 } else { 368 status = fs.getFileStatus(initialPath); 369 } 370 } 371 return status; 372 } 373 374 /** @return The {@link Path} of the file */ 375 public Path getPath() { 376 return initialPath; 377 } 378 379 /** @return The {@link FileStatus} of the file */ 380 public FileStatus getFileStatus() throws IOException { 381 return getReferencedFileStatus(fs); 382 } 383 384 /** @return Get the modification time of the file. */ 385 public long getModificationTime() throws IOException { 386 return getFileStatus().getModificationTime(); 387 } 388 389 @Override 390 public String toString() { 391 return this.getPath() + 392 (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : ""); 393 } 394 395 /** 396 * @param path Path to check. 397 * @return True if the path has format of a HFile. 398 */ 399 public static boolean isHFile(final Path path) { 400 return isHFile(path.getName()); 401 } 402 403 public static boolean isHFile(final String fileName) { 404 Matcher m = HFILE_NAME_PATTERN.matcher(fileName); 405 return m.matches() && m.groupCount() > 0; 406 } 407 408 /** 409 * @param path Path to check. 410 * @return True if the path has format of a del file. 411 */ 412 public static boolean isDelFile(final Path path) { 413 return isDelFile(path.getName()); 414 } 415 416 /** 417 * @param fileName Sting version of path to validate. 418 * @return True if the file name has format of a del file. 419 */ 420 public static boolean isDelFile(final String fileName) { 421 Matcher m = DELFILE_NAME_PATTERN.matcher(fileName); 422 return m.matches() && m.groupCount() > 0; 423 } 424 425 /** 426 * @param path Path to check. 427 * @return True if the path has format of a HStoreFile reference. 428 */ 429 public static boolean isReference(final Path path) { 430 return isReference(path.getName()); 431 } 432 433 /** 434 * @param name file name to check. 435 * @return True if the path has format of a HStoreFile reference. 436 */ 437 public static boolean isReference(final String name) { 438 Matcher m = REF_NAME_PATTERN.matcher(name); 439 return m.matches() && m.groupCount() > 1; 440 } 441 442 /** 443 * @return timestamp when this file was created (as returned by filesystem) 444 */ 445 public long getCreatedTimestamp() { 446 return createdTimestamp; 447 } 448 449 /* 450 * Return path to the file referred to by a Reference. Presumes a directory 451 * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>. 452 * @param p Path to a Reference file. 453 * @return Calculated path to parent region file. 454 * @throws IllegalArgumentException when path regex fails to match. 455 */ 456 public static Path getReferredToFile(final Path p) { 457 Matcher m = REF_NAME_PATTERN.matcher(p.getName()); 458 if (m == null || !m.matches()) { 459 LOG.warn("Failed match of store file name " + p.toString()); 460 throw new IllegalArgumentException("Failed match of store file name " + 461 p.toString()); 462 } 463 464 // Other region name is suffix on the passed Reference file name 465 String otherRegion = m.group(2); 466 // Tabledir is up two directories from where Reference was written. 467 Path tableDir = p.getParent().getParent().getParent(); 468 String nameStrippedOfSuffix = m.group(1); 469 if (LOG.isTraceEnabled()) { 470 LOG.trace("reference '" + p + "' to region=" + otherRegion 471 + " hfile=" + nameStrippedOfSuffix); 472 } 473 474 // Build up new path with the referenced region in place of our current 475 // region in the reference path. Also strip regionname suffix from name. 476 return new Path(new Path(new Path(tableDir, otherRegion), 477 p.getParent().getName()), nameStrippedOfSuffix); 478 } 479 480 /** 481 * Validate the store file name. 482 * @param fileName name of the file to validate 483 * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise 484 */ 485 public static boolean validateStoreFileName(final String fileName) { 486 if (HFileLink.isHFileLink(fileName) || isReference(fileName)) 487 return(true); 488 return !fileName.contains("-"); 489 } 490 491 /** 492 * Return if the specified file is a valid store file or not. 493 * @param fileStatus The {@link FileStatus} of the file 494 * @return <tt>true</tt> if the file is valid 495 */ 496 public static boolean isValid(final FileStatus fileStatus) 497 throws IOException { 498 final Path p = fileStatus.getPath(); 499 500 if (fileStatus.isDirectory()) 501 return false; 502 503 // Check for empty hfile. Should never be the case but can happen 504 // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646 505 // NOTE: that the HFileLink is just a name, so it's an empty file. 506 if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) { 507 LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?"); 508 return false; 509 } 510 511 return validateStoreFileName(p.getName()); 512 } 513 514 /** 515 * helper function to compute HDFS blocks distribution of a given reference 516 * file.For reference file, we don't compute the exact value. We use some 517 * estimate instead given it might be good enough. we assume bottom part 518 * takes the first half of reference file, top part takes the second half 519 * of the reference file. This is just estimate, given 520 * midkey ofregion != midkey of HFile, also the number and size of keys vary. 521 * If this estimate isn't good enough, we can improve it later. 522 * @param fs The FileSystem 523 * @param reference The reference 524 * @param status The reference FileStatus 525 * @return HDFS blocks distribution 526 */ 527 private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( 528 final FileSystem fs, final Reference reference, final FileStatus status) 529 throws IOException { 530 if (status == null) { 531 return null; 532 } 533 534 long start = 0; 535 long length = 0; 536 537 if (Reference.isTopFileRegion(reference.getFileRegion())) { 538 start = status.getLen()/2; 539 length = status.getLen() - status.getLen()/2; 540 } else { 541 start = 0; 542 length = status.getLen()/2; 543 } 544 return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); 545 } 546 547 @Override 548 public boolean equals(Object that) { 549 if (this == that) return true; 550 if (that == null) return false; 551 552 if (!(that instanceof StoreFileInfo)) return false; 553 554 StoreFileInfo o = (StoreFileInfo)that; 555 if (initialPath != null && o.initialPath == null) return false; 556 if (initialPath == null && o.initialPath != null) return false; 557 if (initialPath != o.initialPath && initialPath != null 558 && !initialPath.equals(o.initialPath)) return false; 559 560 if (reference != null && o.reference == null) return false; 561 if (reference == null && o.reference != null) return false; 562 if (reference != o.reference && reference != null 563 && !reference.equals(o.reference)) return false; 564 565 if (link != null && o.link == null) return false; 566 if (link == null && o.link != null) return false; 567 if (link != o.link && link != null && !link.equals(o.link)) return false; 568 569 return true; 570 }; 571 572 573 @Override 574 public int hashCode() { 575 int hash = 17; 576 hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode()); 577 hash = hash * 31 + ((initialPath == null) ? 0 : initialPath.hashCode()); 578 hash = hash * 31 + ((link == null) ? 0 : link.hashCode()); 579 return hash; 580 } 581 582 /** 583 * Return the active file name that contains the real data. 584 * <p> 585 * For referenced hfile, we will return the name of the reference file as it will be used to 586 * construct the StoreFileReader. And for linked hfile, we will return the name of the file being 587 * linked. 588 */ 589 public String getActiveFileName() { 590 if (reference != null || link == null) { 591 return initialPath.getName(); 592 } else { 593 return HFileLink.getReferencedHFileName(initialPath.getName()); 594 } 595 } 596}