001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.master; 020 021import java.io.FileNotFoundException; 022import java.io.IOException; 023import java.util.Comparator; 024import java.util.HashSet; 025import java.util.Map; 026import java.util.TreeMap; 027import java.util.concurrent.atomic.AtomicBoolean; 028import java.util.concurrent.atomic.AtomicInteger; 029 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.MetaTableAccessor; 034import org.apache.hadoop.hbase.ScheduledChore; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 037import org.apache.hadoop.hbase.client.Connection; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.Result; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 042import org.apache.hadoop.hbase.master.assignment.GCMergedRegionsProcedure; 043import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 044import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 045import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 046import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.FSUtils; 049import org.apache.hadoop.hbase.util.Pair; 050import org.apache.hadoop.hbase.util.PairOfSameType; 051import org.apache.hadoop.hbase.util.Threads; 052import org.apache.hadoop.hbase.util.Triple; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * A janitor for the catalog tables. Scans the <code>hbase:meta</code> catalog 059 * table on a period looking for unused regions to garbage collect. 060 */ 061@InterfaceAudience.Private 062public class CatalogJanitor extends ScheduledChore { 063 private static final Logger LOG = LoggerFactory.getLogger(CatalogJanitor.class.getName()); 064 065 private final AtomicBoolean alreadyRunning = new AtomicBoolean(false); 066 private final AtomicBoolean enabled = new AtomicBoolean(true); 067 private final MasterServices services; 068 private final Connection connection; 069 // PID of the last Procedure launched herein. Keep around for Tests. 070 071 CatalogJanitor(final MasterServices services) { 072 super("CatalogJanitor-" + services.getServerName().toShortString(), services, 073 services.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000)); 074 this.services = services; 075 this.connection = services.getConnection(); 076 } 077 078 @Override 079 protected boolean initialChore() { 080 try { 081 if (this.enabled.get()) scan(); 082 } catch (IOException e) { 083 LOG.warn("Failed initial scan of catalog table", e); 084 return false; 085 } 086 return true; 087 } 088 089 /** 090 * @param enabled 091 */ 092 public boolean setEnabled(final boolean enabled) { 093 boolean alreadyEnabled = this.enabled.getAndSet(enabled); 094 // If disabling is requested on an already enabled chore, we could have an active 095 // scan still going on, callers might not be aware of that and do further action thinkng 096 // that no action would be from this chore. In this case, the right action is to wait for 097 // the active scan to complete before exiting this function. 098 if (!enabled && alreadyEnabled) { 099 while (alreadyRunning.get()) { 100 Threads.sleepWithoutInterrupt(100); 101 } 102 } 103 return alreadyEnabled; 104 } 105 106 boolean getEnabled() { 107 return this.enabled.get(); 108 } 109 110 @Override 111 protected void chore() { 112 try { 113 AssignmentManager am = this.services.getAssignmentManager(); 114 if (this.enabled.get() && !this.services.isInMaintenanceMode() && 115 !this.services.getServerManager().isClusterShutdown() && am != null && 116 am.isMetaLoaded() && !am.hasRegionsInTransition()) { 117 scan(); 118 } else { 119 LOG.warn("CatalogJanitor is disabled! Enabled=" + this.enabled.get() + 120 ", maintenanceMode=" + this.services.isInMaintenanceMode() + ", am=" + am + 121 ", metaLoaded=" + (am != null && am.isMetaLoaded()) + ", hasRIT=" + 122 (am != null && am.hasRegionsInTransition()) + " clusterShutDown=" + this.services 123 .getServerManager().isClusterShutdown()); 124 } 125 } catch (IOException e) { 126 LOG.warn("Failed scan of catalog table", e); 127 } 128 } 129 130 /** 131 * Scans hbase:meta and returns a number of scanned rows, and a map of merged 132 * regions, and an ordered map of split parents. 133 * @return triple of scanned rows, map of merged regions and map of split 134 * parent regioninfos 135 * @throws IOException 136 */ 137 Triple<Integer, Map<RegionInfo, Result>, Map<RegionInfo, Result>> 138 getMergedRegionsAndSplitParents() throws IOException { 139 return getMergedRegionsAndSplitParents(null); 140 } 141 142 /** 143 * Scans hbase:meta and returns a number of scanned rows, and a map of merged 144 * regions, and an ordered map of split parents. if the given table name is 145 * null, return merged regions and split parents of all tables, else only the 146 * specified table 147 * @param tableName null represents all tables 148 * @return triple of scanned rows, and map of merged regions, and map of split 149 * parent regioninfos 150 * @throws IOException 151 */ 152 Triple<Integer, Map<RegionInfo, Result>, Map<RegionInfo, Result>> 153 getMergedRegionsAndSplitParents(final TableName tableName) throws IOException { 154 final boolean isTableSpecified = (tableName != null); 155 // TODO: Only works with single hbase:meta region currently. Fix. 156 final AtomicInteger count = new AtomicInteger(0); 157 // Keep Map of found split parents. There are candidates for cleanup. 158 // Use a comparator that has split parents come before its daughters. 159 final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator()); 160 final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR); 161 // This visitor collects split parents and counts rows in the hbase:meta table 162 163 MetaTableAccessor.Visitor visitor = new MetaTableAccessor.Visitor() { 164 @Override 165 public boolean visit(Result r) throws IOException { 166 if (r == null || r.isEmpty()) return true; 167 count.incrementAndGet(); 168 RegionInfo info = MetaTableAccessor.getRegionInfo(r); 169 if (info == null) return true; // Keep scanning 170 if (isTableSpecified 171 && info.getTable().compareTo(tableName) > 0) { 172 // Another table, stop scanning 173 return false; 174 } 175 if (LOG.isTraceEnabled()) LOG.trace("" + info + " IS-SPLIT_PARENT=" + info.isSplitParent()); 176 if (info.isSplitParent()) splitParents.put(info, r); 177 if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) { 178 mergedRegions.put(info, r); 179 } 180 // Returning true means "keep scanning" 181 return true; 182 } 183 }; 184 185 // Run full scan of hbase:meta catalog table passing in our custom visitor with 186 // the start row 187 MetaTableAccessor.scanMetaForTableRegions(this.connection, visitor, tableName); 188 189 return new Triple<>(count.get(), mergedRegions, splitParents); 190 } 191 192 /** 193 * If merged region no longer holds reference to the merge regions, archive 194 * merge region on hdfs and perform deleting references in hbase:meta 195 * @param mergedRegion 196 * @return true if we delete references in merged region on hbase:meta and archive 197 * the files on the file system 198 * @throws IOException 199 */ 200 boolean cleanMergeRegion(final RegionInfo mergedRegion, 201 final RegionInfo regionA, final RegionInfo regionB) throws IOException { 202 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 203 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 204 Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable()); 205 TableDescriptor htd = getTableDescriptor(mergedRegion.getTable()); 206 HRegionFileSystem regionFs = null; 207 try { 208 regionFs = HRegionFileSystem.openRegionFromFileSystem( 209 this.services.getConfiguration(), fs, tabledir, mergedRegion, true); 210 } catch (IOException e) { 211 LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName()); 212 } 213 if (regionFs == null || !regionFs.hasReferences(htd)) { 214 LOG.debug("Deleting region " + regionA.getShortNameToLog() + " and " 215 + regionB.getShortNameToLog() 216 + " from fs because merged region no longer holds references"); 217 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 218 pe.submitProcedure(new GCMergedRegionsProcedure(pe.getEnvironment(), 219 mergedRegion, regionA, regionB)); 220 // Remove from in-memory states 221 this.services.getAssignmentManager().getRegionStates().deleteRegion(regionA); 222 this.services.getAssignmentManager().getRegionStates().deleteRegion(regionB); 223 this.services.getServerManager().removeRegion(regionA); 224 this.services.getServerManager().removeRegion(regionB); 225 return true; 226 } 227 return false; 228 } 229 230 /** 231 * Run janitorial scan of catalog <code>hbase:meta</code> table looking for 232 * garbage to collect. 233 * @return number of archiving jobs started. 234 * @throws IOException 235 */ 236 int scan() throws IOException { 237 int result = 0; 238 239 try { 240 if (!alreadyRunning.compareAndSet(false, true)) { 241 LOG.debug("CatalogJanitor already running"); 242 return result; 243 } 244 Triple<Integer, Map<RegionInfo, Result>, Map<RegionInfo, Result>> scanTriple = 245 getMergedRegionsAndSplitParents(); 246 /** 247 * clean merge regions first 248 */ 249 Map<RegionInfo, Result> mergedRegions = scanTriple.getSecond(); 250 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 251 if (this.services.isInMaintenanceMode()) { 252 // Stop cleaning if the master is in maintenance mode 253 break; 254 } 255 256 PairOfSameType<RegionInfo> p = MetaTableAccessor.getMergeRegions(e.getValue()); 257 RegionInfo regionA = p.getFirst(); 258 RegionInfo regionB = p.getSecond(); 259 if (regionA == null || regionB == null) { 260 LOG.warn("Unexpected references regionA=" 261 + (regionA == null ? "null" : regionA.getShortNameToLog()) 262 + ",regionB=" 263 + (regionB == null ? "null" : regionB.getShortNameToLog()) 264 + " in merged region " + e.getKey().getShortNameToLog()); 265 } else { 266 if (cleanMergeRegion(e.getKey(), regionA, regionB)) { 267 result++; 268 } 269 } 270 } 271 /** 272 * clean split parents 273 */ 274 Map<RegionInfo, Result> splitParents = scanTriple.getThird(); 275 276 // Now work on our list of found parents. See if any we can clean up. 277 // regions whose parents are still around 278 HashSet<String> parentNotCleaned = new HashSet<>(); 279 for (Map.Entry<RegionInfo, Result> e : splitParents.entrySet()) { 280 if (this.services.isInMaintenanceMode()) { 281 // Stop cleaning if the master is in maintenance mode 282 break; 283 } 284 285 if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && 286 cleanParent(e.getKey(), e.getValue())) { 287 result++; 288 } else { 289 // We could not clean the parent, so it's daughters should not be 290 // cleaned either (HBASE-6160) 291 PairOfSameType<RegionInfo> daughters = 292 MetaTableAccessor.getDaughterRegions(e.getValue()); 293 parentNotCleaned.add(daughters.getFirst().getEncodedName()); 294 parentNotCleaned.add(daughters.getSecond().getEncodedName()); 295 } 296 } 297 return result; 298 } finally { 299 alreadyRunning.set(false); 300 } 301 } 302 303 /** 304 * Compare HRegionInfos in a way that has split parents sort BEFORE their 305 * daughters. 306 */ 307 static class SplitParentFirstComparator implements Comparator<RegionInfo> { 308 Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator(); 309 @Override 310 public int compare(RegionInfo left, RegionInfo right) { 311 // This comparator differs from the one RegionInfo in that it sorts 312 // parent before daughters. 313 if (left == null) return -1; 314 if (right == null) return 1; 315 // Same table name. 316 int result = left.getTable().compareTo(right.getTable()); 317 if (result != 0) return result; 318 // Compare start keys. 319 result = Bytes.compareTo(left.getStartKey(), right.getStartKey()); 320 if (result != 0) return result; 321 // Compare end keys, but flip the operands so parent comes first 322 result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey()); 323 324 return result; 325 } 326 } 327 328 /** 329 * If daughters no longer hold reference to the parents, delete the parent. 330 * @param parent RegionInfo of split offlined parent 331 * @param rowContent Content of <code>parent</code> row in 332 * <code>metaRegionName</code> 333 * @return True if we removed <code>parent</code> from meta table and from 334 * the filesystem. 335 * @throws IOException 336 */ 337 boolean cleanParent(final RegionInfo parent, Result rowContent) 338 throws IOException { 339 // Check whether it is a merged region and not clean reference 340 // No necessary to check MERGEB_QUALIFIER because these two qualifiers will 341 // be inserted/deleted together 342 if (rowContent.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) { 343 // wait cleaning merge region first 344 return false; 345 } 346 // Run checks on each daughter split. 347 PairOfSameType<RegionInfo> daughters = MetaTableAccessor.getDaughterRegions(rowContent); 348 Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst()); 349 Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond()); 350 if (hasNoReferences(a) && hasNoReferences(b)) { 351 String daughterA = daughters.getFirst() != null? 352 daughters.getFirst().getShortNameToLog(): "null"; 353 String daughterB = daughters.getSecond() != null? 354 daughters.getSecond().getShortNameToLog(): "null"; 355 LOG.debug("Deleting region " + parent.getShortNameToLog() + 356 " because daughters -- " + daughterA + ", " + daughterB + 357 " -- no longer hold references"); 358 ProcedureExecutor<MasterProcedureEnv> pe = this.services.getMasterProcedureExecutor(); 359 pe.submitProcedure(new GCRegionProcedure(pe.getEnvironment(), parent)); 360 // Remove from in-memory states 361 this.services.getAssignmentManager().getRegionStates().deleteRegion(parent); 362 this.services.getServerManager().removeRegion(parent); 363 return true; 364 } 365 return false; 366 } 367 368 /** 369 * @param p A pair where the first boolean says whether or not the daughter 370 * region directory exists in the filesystem and then the second boolean says 371 * whether the daughter has references to the parent. 372 * @return True the passed <code>p</code> signifies no references. 373 */ 374 private boolean hasNoReferences(final Pair<Boolean, Boolean> p) { 375 return !p.getFirst() || !p.getSecond(); 376 } 377 378 /** 379 * Checks if a daughter region -- either splitA or splitB -- still holds 380 * references to parent. 381 * @param parent Parent region 382 * @param daughter Daughter region 383 * @return A pair where the first boolean says whether or not the daughter 384 * region directory exists in the filesystem and then the second boolean says 385 * whether the daughter has references to the parent. 386 * @throws IOException 387 */ 388 Pair<Boolean, Boolean> checkDaughterInFs(final RegionInfo parent, final RegionInfo daughter) 389 throws IOException { 390 if (daughter == null) { 391 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 392 } 393 394 FileSystem fs = this.services.getMasterFileSystem().getFileSystem(); 395 Path rootdir = this.services.getMasterFileSystem().getRootDir(); 396 Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable()); 397 398 Path daughterRegionDir = new Path(tabledir, daughter.getEncodedName()); 399 400 HRegionFileSystem regionFs = null; 401 402 try { 403 if (!FSUtils.isExists(fs, daughterRegionDir)) { 404 return new Pair<>(Boolean.FALSE, Boolean.FALSE); 405 } 406 } catch (IOException ioe) { 407 LOG.error("Error trying to determine if daughter region exists, " + 408 "assuming exists and has references", ioe); 409 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 410 } 411 412 boolean references = false; 413 TableDescriptor parentDescriptor = getTableDescriptor(parent.getTable()); 414 try { 415 regionFs = HRegionFileSystem.openRegionFromFileSystem( 416 this.services.getConfiguration(), fs, tabledir, daughter, true); 417 418 for (ColumnFamilyDescriptor family: parentDescriptor.getColumnFamilies()) { 419 if ((references = regionFs.hasReferences(family.getNameAsString()))) { 420 break; 421 } 422 } 423 } catch (IOException e) { 424 LOG.error("Error trying to determine referenced files from : " + daughter.getEncodedName() 425 + ", to: " + parent.getEncodedName() + " assuming has references", e); 426 return new Pair<>(Boolean.TRUE, Boolean.TRUE); 427 } 428 return new Pair<>(Boolean.TRUE, Boolean.valueOf(references)); 429 } 430 431 private TableDescriptor getTableDescriptor(final TableName tableName) 432 throws FileNotFoundException, IOException { 433 return this.services.getTableDescriptors().get(tableName); 434 } 435 436 /** 437 * Checks if the specified region has merge qualifiers, if so, try to clean 438 * them 439 * @param region 440 * @return true if the specified region doesn't have merge qualifier now 441 * @throws IOException 442 */ 443 public boolean cleanMergeQualifier(final RegionInfo region) 444 throws IOException { 445 // Get merge regions if it is a merged region and already has merge 446 // qualifier 447 Pair<RegionInfo, RegionInfo> mergeRegions = MetaTableAccessor 448 .getRegionsFromMergeQualifier(this.services.getConnection(), 449 region.getRegionName()); 450 if (mergeRegions == null 451 || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) { 452 // It doesn't have merge qualifier, no need to clean 453 return true; 454 } 455 // It shouldn't happen, we must insert/delete these two qualifiers together 456 if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) { 457 LOG.error("Merged region " + region.getRegionNameAsString() 458 + " has only one merge qualifier in META."); 459 return false; 460 } 461 return cleanMergeRegion(region, mergeRegions.getFirst(), 462 mergeRegions.getSecond()); 463 } 464}