001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.io.InterruptedIOException; 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.Set; 035import java.util.SortedMap; 036import java.util.TreeMap; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040import org.apache.hadoop.conf.Configuration; 041import org.apache.hadoop.hbase.Cell.Type; 042import org.apache.hadoop.hbase.client.Connection; 043import org.apache.hadoop.hbase.client.ConnectionFactory; 044import org.apache.hadoop.hbase.client.Consistency; 045import org.apache.hadoop.hbase.client.Delete; 046import org.apache.hadoop.hbase.client.Get; 047import org.apache.hadoop.hbase.client.Mutation; 048import org.apache.hadoop.hbase.client.Put; 049import org.apache.hadoop.hbase.client.RegionInfo; 050import org.apache.hadoop.hbase.client.RegionInfoBuilder; 051import org.apache.hadoop.hbase.client.RegionLocator; 052import org.apache.hadoop.hbase.client.RegionReplicaUtil; 053import org.apache.hadoop.hbase.client.RegionServerCallable; 054import org.apache.hadoop.hbase.client.Result; 055import org.apache.hadoop.hbase.client.ResultScanner; 056import org.apache.hadoop.hbase.client.Scan; 057import org.apache.hadoop.hbase.client.Table; 058import org.apache.hadoop.hbase.client.TableState; 059import org.apache.hadoop.hbase.exceptions.DeserializationException; 060import org.apache.hadoop.hbase.filter.Filter; 061import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 062import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; 063import org.apache.hadoop.hbase.master.RegionState; 064import org.apache.hadoop.hbase.master.RegionState.State; 065import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 066import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 067import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier; 068import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType; 069import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos; 070import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 071import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 072import org.apache.hadoop.hbase.util.Bytes; 073import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 074import org.apache.hadoop.hbase.util.ExceptionUtil; 075import org.apache.hadoop.hbase.util.Pair; 076import org.apache.hadoop.hbase.util.PairOfSameType; 077import org.apache.yetus.audience.InterfaceAudience; 078import org.slf4j.Logger; 079import org.slf4j.LoggerFactory; 080 081import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 082 083/** 084 * <p> 085 * Read/write operations on region and assignment information store in <code>hbase:meta</code>. 086 * </p> 087 * <p> 088 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 089 * because when used on client-side (like from HBaseAdmin), we want to use short-living connection 090 * (opened before each operation, closed right after), while when used on HM or HRS (like in 091 * AssignmentManager) we want permanent connection. 092 * </p> 093 * <p> 094 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 095 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 096 * called default replica. 097 * </p> 098 * <p> 099 * Meta layout (as of 0.98 + HBASE-10070) is like: 100 * 101 * <pre> 102 * For each table there is single row in column family 'table' formatted: 103 * <tableName> including namespace and columns are: 104 * table: state => contains table state 105 * 106 * For each table range, there is a single row, formatted like: 107 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 108 * This row corresponds to the regionName of the default region replica. 109 * Columns are: 110 * info:regioninfo => contains serialized HRI for the default region replica 111 * info:server => contains hostname:port (in string form) for the server hosting 112 * the default regionInfo replica 113 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 114 * the regionInfo replica with replicaId 115 * info:serverstartcode => contains server start code (in binary long form) for the server 116 * hosting the default regionInfo replica 117 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 118 * the server hosting the regionInfo replica with 119 * replicaId 120 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 121 * the server opened the region with default replicaId 122 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 123 * at the time the server opened the region with 124 * replicaId 125 * info:splitA => contains a serialized HRI for the first daughter region if the 126 * region is split 127 * info:splitB => contains a serialized HRI for the second daughter region if the 128 * region is split 129 * info:mergeA => contains a serialized HRI for the first parent region if the 130 * region is the result of a merge 131 * info:mergeB => contains a serialized HRI for the second parent region if the 132 * region is the result of a merge 133 * </pre> 134 * </p> 135 * <p> 136 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 137 * leak out of it (through Result objects, etc) 138 * </p> 139 */ 140@InterfaceAudience.Private 141public class MetaTableAccessor { 142 143 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 144 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 145 146 @VisibleForTesting 147 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 148 149 private static final byte ESCAPE_BYTE = (byte) 0xFF; 150 151 private static final byte SEPARATED_BYTE = 0x00; 152 153 /** 154 * Lists all of the table regions currently in META. 155 * Deprecated, keep there until some test use this. 156 * @param connection what we will use 157 * @param tableName table to list 158 * @return Map of all user-space regions to servers 159 * @deprecated use {@link #getTableRegionsAndLocations}, region can have multiple locations 160 */ 161 @Deprecated 162 public static NavigableMap<RegionInfo, ServerName> allTableRegions( 163 Connection connection, final TableName tableName) throws IOException { 164 final NavigableMap<RegionInfo, ServerName> regions = new TreeMap<>(); 165 Visitor visitor = new TableVisitorBase(tableName) { 166 @Override 167 public boolean visitInternal(Result result) throws IOException { 168 RegionLocations locations = getRegionLocations(result); 169 if (locations == null) return true; 170 for (HRegionLocation loc : locations.getRegionLocations()) { 171 if (loc != null) { 172 RegionInfo regionInfo = loc.getRegionInfo(); 173 regions.put(regionInfo, loc.getServerName()); 174 } 175 } 176 return true; 177 } 178 }; 179 scanMetaForTableRegions(connection, visitor, tableName); 180 return regions; 181 } 182 183 @InterfaceAudience.Private 184 public enum QueryType { 185 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 186 REGION(HConstants.CATALOG_FAMILY), 187 TABLE(HConstants.TABLE_FAMILY), 188 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 189 190 private final byte[][] families; 191 192 QueryType(byte[]... families) { 193 this.families = families; 194 } 195 196 byte[][] getFamilies() { 197 return this.families; 198 } 199 } 200 201 /** The delimiter for meta columns for replicaIds > 0 */ 202 protected static final char META_REPLICA_ID_DELIMITER = '_'; 203 204 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 205 private static final Pattern SERVER_COLUMN_PATTERN 206 = Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 207 208 //////////////////////// 209 // Reading operations // 210 //////////////////////// 211 212 /** 213 * Performs a full scan of <code>hbase:meta</code> for regions. 214 * @param connection connection we're using 215 * @param visitor Visitor invoked against each row in regions family. 216 */ 217 public static void fullScanRegions(Connection connection, 218 final Visitor visitor) 219 throws IOException { 220 scanMeta(connection, null, null, QueryType.REGION, visitor); 221 } 222 223 /** 224 * Performs a full scan of <code>hbase:meta</code> for regions. 225 * @param connection connection we're using 226 */ 227 public static List<Result> fullScanRegions(Connection connection) 228 throws IOException { 229 return fullScan(connection, QueryType.REGION); 230 } 231 232 /** 233 * Performs a full scan of <code>hbase:meta</code> for tables. 234 * @param connection connection we're using 235 * @param visitor Visitor invoked against each row in tables family. 236 */ 237 public static void fullScanTables(Connection connection, 238 final Visitor visitor) 239 throws IOException { 240 scanMeta(connection, null, null, QueryType.TABLE, visitor); 241 } 242 243 /** 244 * Performs a full scan of <code>hbase:meta</code>. 245 * @param connection connection we're using 246 * @param type scanned part of meta 247 * @return List of {@link Result} 248 */ 249 public static List<Result> fullScan(Connection connection, QueryType type) 250 throws IOException { 251 CollectAllVisitor v = new CollectAllVisitor(); 252 scanMeta(connection, null, null, type, v); 253 return v.getResults(); 254 } 255 256 /** 257 * Callers should call close on the returned {@link Table} instance. 258 * @param connection connection we're using to access Meta 259 * @return An {@link Table} for <code>hbase:meta</code> 260 */ 261 public static Table getMetaHTable(final Connection connection) 262 throws IOException { 263 // We used to pass whole CatalogTracker in here, now we just pass in Connection 264 if (connection == null) { 265 throw new NullPointerException("No connection"); 266 } else if (connection.isClosed()) { 267 throw new IOException("connection is closed"); 268 } 269 return connection.getTable(TableName.META_TABLE_NAME); 270 } 271 272 /** 273 * @param t Table to use (will be closed when done). 274 * @param g Get to run 275 */ 276 private static Result get(final Table t, final Get g) throws IOException { 277 if (t == null) return null; 278 try { 279 return t.get(g); 280 } finally { 281 t.close(); 282 } 283 } 284 285 /** 286 * Gets the region info and assignment for the specified region. 287 * @param connection connection we're using 288 * @param regionName Region to lookup. 289 * @return Location and RegionInfo for <code>regionName</code> 290 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 291 */ 292 @Deprecated 293 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte [] regionName) 294 throws IOException { 295 HRegionLocation location = getRegionLocation(connection, regionName); 296 return location == null 297 ? null 298 : new Pair<>(location.getRegionInfo(), location.getServerName()); 299 } 300 301 /** 302 * Returns the HRegionLocation from meta for the given region 303 * @param connection connection we're using 304 * @param regionName region we're looking for 305 * @return HRegionLocation for the given region 306 */ 307 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 308 throws IOException { 309 byte[] row = regionName; 310 RegionInfo parsedInfo = null; 311 try { 312 parsedInfo = parseRegionInfoFromRegionName(regionName); 313 row = getMetaKeyForRegion(parsedInfo); 314 } catch (Exception parseEx) { 315 // Ignore. This is used with tableName passed as regionName. 316 } 317 Get get = new Get(row); 318 get.addFamily(HConstants.CATALOG_FAMILY); 319 Result r = get(getMetaHTable(connection), get); 320 RegionLocations locations = getRegionLocations(r); 321 return locations == null ? null 322 : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); 323 } 324 325 /** 326 * Returns the HRegionLocation from meta for the given region 327 * @param connection connection we're using 328 * @param regionInfo region information 329 * @return HRegionLocation for the given region 330 */ 331 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 332 throws IOException { 333 byte[] row = getMetaKeyForRegion(regionInfo); 334 Get get = new Get(row); 335 get.addFamily(HConstants.CATALOG_FAMILY); 336 Result r = get(getMetaHTable(connection), get); 337 return getRegionLocation(r, regionInfo, regionInfo.getReplicaId()); 338 } 339 340 /** Returns the row key to use for this regionInfo */ 341 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 342 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 343 } 344 345 /** Returns an HRI parsed from this regionName. Not all the fields of the HRI 346 * is stored in the name, so the returned object should only be used for the fields 347 * in the regionName. 348 */ 349 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 350 byte[][] fields = RegionInfo.parseRegionName(regionName); 351 long regionId = Long.parseLong(Bytes.toString(fields[2])); 352 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 353 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])) 354 .setStartKey(fields[1]) 355 .setEndKey(fields[2]) 356 .setSplit(false) 357 .setRegionId(regionId) 358 .setReplicaId(replicaId) 359 .build(); 360 } 361 362 /** 363 * Gets the result in hbase:meta for the specified region. 364 * @param connection connection we're using 365 * @param regionName region we're looking for 366 * @return result of the specified region 367 */ 368 public static Result getRegionResult(Connection connection, 369 byte[] regionName) throws IOException { 370 Get get = new Get(regionName); 371 get.addFamily(HConstants.CATALOG_FAMILY); 372 return get(getMetaHTable(connection), get); 373 } 374 375 /** 376 * Get regions from the merge qualifier of the specified merged region 377 * @return null if it doesn't contain merge qualifier, else two merge regions 378 */ 379 @Nullable 380 public static Pair<RegionInfo, RegionInfo> getRegionsFromMergeQualifier( 381 Connection connection, byte[] regionName) throws IOException { 382 Result result = getRegionResult(connection, regionName); 383 RegionInfo mergeA = getRegionInfo(result, HConstants.MERGEA_QUALIFIER); 384 RegionInfo mergeB = getRegionInfo(result, HConstants.MERGEB_QUALIFIER); 385 if (mergeA == null && mergeB == null) { 386 return null; 387 } 388 return new Pair<>(mergeA, mergeB); 389 } 390 391 /** 392 * Checks if the specified table exists. Looks at the hbase:meta table hosted on 393 * the specified server. 394 * @param connection connection we're using 395 * @param tableName table to check 396 * @return true if the table exists in meta, false if not 397 */ 398 public static boolean tableExists(Connection connection, 399 final TableName tableName) 400 throws IOException { 401 // Catalog tables always exist. 402 return tableName.equals(TableName.META_TABLE_NAME) 403 || getTableState(connection, tableName) != null; 404 } 405 406 /** 407 * Lists all of the regions currently in META. 408 * 409 * @param connection to connect with 410 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 411 * true and we'll leave out offlined regions from returned list 412 * @return List of all user-space regions. 413 */ 414 @VisibleForTesting 415 public static List<RegionInfo> getAllRegions(Connection connection, 416 boolean excludeOfflinedSplitParents) 417 throws IOException { 418 List<Pair<RegionInfo, ServerName>> result; 419 420 result = getTableRegionsAndLocations(connection, null, 421 excludeOfflinedSplitParents); 422 423 return getListOfRegionInfos(result); 424 425 } 426 427 /** 428 * Gets all of the regions of the specified table. Do not use this method 429 * to get meta table regions, use methods in MetaTableLocator instead. 430 * @param connection connection we're using 431 * @param tableName table we're looking for 432 * @return Ordered list of {@link RegionInfo}. 433 */ 434 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 435 throws IOException { 436 return getTableRegions(connection, tableName, false); 437 } 438 439 /** 440 * Gets all of the regions of the specified table. Do not use this method 441 * to get meta table regions, use methods in MetaTableLocator instead. 442 * @param connection connection we're using 443 * @param tableName table we're looking for 444 * @param excludeOfflinedSplitParents If true, do not include offlined split 445 * parents in the return. 446 * @return Ordered list of {@link RegionInfo}. 447 */ 448 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 449 final boolean excludeOfflinedSplitParents) throws IOException { 450 List<Pair<RegionInfo, ServerName>> result = 451 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 452 return getListOfRegionInfos(result); 453 } 454 455 private static List<RegionInfo> getListOfRegionInfos( 456 final List<Pair<RegionInfo, ServerName>> pairs) { 457 if (pairs == null || pairs.isEmpty()) { 458 return Collections.emptyList(); 459 } 460 List<RegionInfo> result = new ArrayList<>(pairs.size()); 461 for (Pair<RegionInfo, ServerName> pair : pairs) { 462 result.add(pair.getFirst()); 463 } 464 return result; 465 } 466 467 /** 468 * @param tableName table we're working with 469 * @return start row for scanning META according to query type 470 */ 471 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 472 if (tableName == null) { 473 return null; 474 } 475 switch (type) { 476 case REGION: 477 byte[] startRow = new byte[tableName.getName().length + 2]; 478 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 479 startRow[startRow.length - 2] = HConstants.DELIMITER; 480 startRow[startRow.length - 1] = HConstants.DELIMITER; 481 return startRow; 482 case ALL: 483 case TABLE: 484 default: 485 return tableName.getName(); 486 } 487 } 488 489 /** 490 * @param tableName table we're working with 491 * @return stop row for scanning META according to query type 492 */ 493 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 494 if (tableName == null) { 495 return null; 496 } 497 final byte[] stopRow; 498 switch (type) { 499 case REGION: 500 stopRow = new byte[tableName.getName().length + 3]; 501 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 502 stopRow[stopRow.length - 3] = ' '; 503 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 504 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 505 break; 506 case ALL: 507 case TABLE: 508 default: 509 stopRow = new byte[tableName.getName().length + 1]; 510 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 511 stopRow[stopRow.length - 1] = ' '; 512 break; 513 } 514 return stopRow; 515 } 516 517 /** 518 * This method creates a Scan object that will only scan catalog rows that 519 * belong to the specified table. It doesn't specify any columns. 520 * This is a better alternative to just using a start row and scan until 521 * it hits a new table since that requires parsing the HRI to get the table 522 * name. 523 * @param tableName bytes of table's name 524 * @return configured Scan object 525 */ 526 @Deprecated 527 public static Scan getScanForTableName(Connection connection, TableName tableName) { 528 // Start key is just the table name with delimiters 529 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 530 // Stop key appends the smallest possible char to the table name 531 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 532 533 Scan scan = getMetaScan(connection, -1); 534 scan.setStartRow(startKey); 535 scan.setStopRow(stopKey); 536 return scan; 537 } 538 539 private static Scan getMetaScan(Connection connection, int rowUpperLimit) { 540 Scan scan = new Scan(); 541 int scannerCaching = connection.getConfiguration() 542 .getInt(HConstants.HBASE_META_SCANNER_CACHING, 543 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 544 if (connection.getConfiguration().getBoolean(HConstants.USE_META_REPLICAS, 545 HConstants.DEFAULT_USE_META_REPLICAS)) { 546 scan.setConsistency(Consistency.TIMELINE); 547 } 548 if (rowUpperLimit > 0) { 549 scan.setLimit(rowUpperLimit); 550 scan.setReadType(Scan.ReadType.PREAD); 551 } 552 scan.setCaching(scannerCaching); 553 return scan; 554 } 555 /** 556 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 557 * @param connection connection we're using 558 * @param tableName table we're looking for 559 * @return Return list of regioninfos and server. 560 * @throws IOException 561 */ 562 public static List<Pair<RegionInfo, ServerName>> 563 getTableRegionsAndLocations(Connection connection, TableName tableName) 564 throws IOException { 565 return getTableRegionsAndLocations(connection, tableName, true); 566 } 567 568 /** 569 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 570 * @param connection connection we're using 571 * @param tableName table to work with, can be null for getting all regions 572 * @param excludeOfflinedSplitParents don't return split parents 573 * @return Return list of regioninfos and server addresses. 574 * @throws IOException 575 */ 576 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 577 Connection connection, @Nullable final TableName tableName, 578 final boolean excludeOfflinedSplitParents) throws IOException { 579 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 580 throw new IOException("This method can't be used to locate meta regions;" 581 + " use MetaTableLocator instead"); 582 } 583 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 584 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 585 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 586 private RegionLocations current = null; 587 588 @Override 589 public boolean visit(Result r) throws IOException { 590 current = getRegionLocations(r); 591 if (current == null || current.getRegionLocation().getRegion() == null) { 592 LOG.warn("No serialized RegionInfo in " + r); 593 return true; 594 } 595 RegionInfo hri = current.getRegionLocation().getRegion(); 596 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 597 // Else call super and add this Result to the collection. 598 return super.visit(r); 599 } 600 601 @Override 602 void add(Result r) { 603 if (current == null) { 604 return; 605 } 606 for (HRegionLocation loc : current.getRegionLocations()) { 607 if (loc != null) { 608 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 609 } 610 } 611 } 612 }; 613 scanMeta(connection, 614 getTableStartRowForMeta(tableName, QueryType.REGION), 615 getTableStopRowForMeta(tableName, QueryType.REGION), 616 QueryType.REGION, visitor); 617 return visitor.getResults(); 618 } 619 620 /** 621 * @param connection connection we're using 622 * @param serverName server whose regions we're interested in 623 * @return List of user regions installed on this server (does not include 624 * catalog regions). 625 * @throws IOException 626 */ 627 public static NavigableMap<RegionInfo, Result> 628 getServerUserRegions(Connection connection, final ServerName serverName) 629 throws IOException { 630 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 631 // Fill the above hris map with entries from hbase:meta that have the passed 632 // servername. 633 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 634 @Override 635 void add(Result r) { 636 if (r == null || r.isEmpty()) return; 637 RegionLocations locations = getRegionLocations(r); 638 if (locations == null) return; 639 for (HRegionLocation loc : locations.getRegionLocations()) { 640 if (loc != null) { 641 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 642 hris.put(loc.getRegion(), r); 643 } 644 } 645 } 646 } 647 }; 648 scanMeta(connection, null, null, QueryType.REGION, v); 649 return hris; 650 } 651 652 public static void fullScanMetaAndPrint(Connection connection) 653 throws IOException { 654 Visitor v = new Visitor() { 655 @Override 656 public boolean visit(Result r) throws IOException { 657 if (r == null || r.isEmpty()) return true; 658 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 659 TableState state = getTableState(r); 660 if (state != null) { 661 LOG.info("Table State: " + state); 662 } else { 663 RegionLocations locations = getRegionLocations(r); 664 if (locations == null) return true; 665 for (HRegionLocation loc : locations.getRegionLocations()) { 666 if (loc != null) { 667 LOG.info("fullScanMetaAndPrint.HRI Print= " + loc.getRegion()); 668 } 669 } 670 } 671 return true; 672 } 673 }; 674 scanMeta(connection, null, null, QueryType.ALL, v); 675 } 676 677 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 678 TableName tableName) throws IOException { 679 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor); 680 } 681 682 public static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 683 final Visitor visitor) throws IOException { 684 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 685 type, maxRows, visitor); 686 } 687 688 public static void scanMeta(Connection connection, @Nullable final byte[] startRow, 689 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 690 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 691 } 692 693 /** 694 * Performs a scan of META table for given table starting from 695 * given row. 696 * 697 * @param connection connection we're using 698 * @param visitor visitor to call 699 * @param tableName table withing we scan 700 * @param row start scan from this row 701 * @param rowLimit max number of rows to return 702 */ 703 public static void scanMeta(Connection connection, final Visitor visitor, 704 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 705 byte[] startRow = null; 706 byte[] stopRow = null; 707 if (tableName != null) { 708 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 709 if (row != null) { 710 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 711 startRow = 712 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 713 } 714 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 715 } 716 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 717 } 718 719 720 /** 721 * Performs a scan of META table. 722 * @param connection connection we're using 723 * @param startRow Where to start the scan. Pass null if want to begin scan 724 * at first row. 725 * @param stopRow Where to stop the scan. Pass null if want to scan all rows 726 * from the start one 727 * @param type scanned part of meta 728 * @param maxRows maximum rows to return 729 * @param visitor Visitor invoked against each row. 730 */ 731 public static void scanMeta(Connection connection, @Nullable final byte[] startRow, 732 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 733 throws IOException { 734 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor); 735 } 736 737 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 738 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 739 final Visitor visitor) throws IOException { 740 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 741 Scan scan = getMetaScan(connection, rowUpperLimit); 742 743 for (byte[] family : type.getFamilies()) { 744 scan.addFamily(family); 745 } 746 if (startRow != null) { 747 scan.withStartRow(startRow); 748 } 749 if (stopRow != null) { 750 scan.withStopRow(stopRow); 751 } 752 if (filter != null) { 753 scan.setFilter(filter); 754 } 755 756 if (LOG.isTraceEnabled()) { 757 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) + 758 " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit + 759 " with caching=" + scan.getCaching()); 760 } 761 762 int currentRow = 0; 763 try (Table metaTable = getMetaHTable(connection)) { 764 try (ResultScanner scanner = metaTable.getScanner(scan)) { 765 Result data; 766 while ((data = scanner.next()) != null) { 767 if (data.isEmpty()) continue; 768 // Break if visit returns false. 769 if (!visitor.visit(data)) break; 770 if (++currentRow >= rowUpperLimit) break; 771 } 772 } 773 } 774 if (visitor != null && visitor instanceof Closeable) { 775 try { 776 ((Closeable) visitor).close(); 777 } catch (Throwable t) { 778 ExceptionUtil.rethrowIfInterrupt(t); 779 LOG.debug("Got exception in closing the meta scanner visitor", t); 780 } 781 } 782 } 783 784 /** 785 * @return Get closest metatable region row to passed <code>row</code> 786 */ 787 @NonNull 788 private static RegionInfo getClosestRegionInfo(Connection connection, 789 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 790 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 791 Scan scan = getMetaScan(connection, 1); 792 scan.setReversed(true); 793 scan.withStartRow(searchRow); 794 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 795 Result result = resultScanner.next(); 796 if (result == null) { 797 throw new TableNotFoundException("Cannot find row in META " + 798 " for table: " + tableName + ", row=" + Bytes.toStringBinary(row)); 799 } 800 RegionInfo regionInfo = getRegionInfo(result); 801 if (regionInfo == null) { 802 throw new IOException("RegionInfo was null or empty in Meta for " + 803 tableName + ", row=" + Bytes.toStringBinary(row)); 804 } 805 return regionInfo; 806 } 807 } 808 809 /** 810 * Returns the column family used for meta columns. 811 * @return HConstants.CATALOG_FAMILY. 812 */ 813 private static byte[] getCatalogFamily() { 814 return HConstants.CATALOG_FAMILY; 815 } 816 817 /** 818 * Returns the column family used for table columns. 819 * @return HConstants.TABLE_FAMILY. 820 */ 821 private static byte[] getTableFamily() { 822 return HConstants.TABLE_FAMILY; 823 } 824 825 /** 826 * Returns the column qualifier for serialized region info 827 * @return HConstants.REGIONINFO_QUALIFIER 828 */ 829 private static byte[] getRegionInfoColumn() { 830 return HConstants.REGIONINFO_QUALIFIER; 831 } 832 833 /** 834 * Returns the column qualifier for serialized table state 835 * @return HConstants.TABLE_STATE_QUALIFIER 836 */ 837 private static byte[] getTableStateColumn() { 838 return HConstants.TABLE_STATE_QUALIFIER; 839 } 840 841 /** 842 * Returns the column qualifier for serialized region state 843 * @return HConstants.STATE_QUALIFIER 844 */ 845 private static byte[] getRegionStateColumn() { 846 return HConstants.STATE_QUALIFIER; 847 } 848 849 /** 850 * Returns the column qualifier for server column for replicaId 851 * @param replicaId the replicaId of the region 852 * @return a byte[] for server column qualifier 853 */ 854 @VisibleForTesting 855 public static byte[] getServerColumn(int replicaId) { 856 return replicaId == 0 857 ? HConstants.SERVER_QUALIFIER 858 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 859 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 860 } 861 862 /** 863 * Returns the column qualifier for server start code column for replicaId 864 * @param replicaId the replicaId of the region 865 * @return a byte[] for server start code column qualifier 866 */ 867 @VisibleForTesting 868 public static byte[] getStartCodeColumn(int replicaId) { 869 return replicaId == 0 870 ? HConstants.STARTCODE_QUALIFIER 871 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 872 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 873 } 874 875 /** 876 * Returns the column qualifier for seqNum column for replicaId 877 * @param replicaId the replicaId of the region 878 * @return a byte[] for seqNum column qualifier 879 */ 880 @VisibleForTesting 881 public static byte[] getSeqNumColumn(int replicaId) { 882 return replicaId == 0 883 ? HConstants.SEQNUM_QUALIFIER 884 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 885 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 886 } 887 888 /** 889 * Parses the replicaId from the server column qualifier. See top of the class javadoc 890 * for the actual meta layout 891 * @param serverColumn the column qualifier 892 * @return an int for the replicaId 893 */ 894 @VisibleForTesting 895 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 896 String serverStr = Bytes.toString(serverColumn); 897 898 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 899 if (matcher.matches() && matcher.groupCount() > 0) { 900 String group = matcher.group(1); 901 if (group != null && group.length() > 0) { 902 return Integer.parseInt(group.substring(1), 16); 903 } else { 904 return 0; 905 } 906 } 907 return -1; 908 } 909 910 /** 911 * Returns a {@link ServerName} from catalog table {@link Result}. 912 * @param r Result to pull from 913 * @return A ServerName instance or null if necessary fields not found or empty. 914 */ 915 @Nullable 916 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 917 public static ServerName getServerName(final Result r, final int replicaId) { 918 byte[] serverColumn = getServerColumn(replicaId); 919 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 920 if (cell == null || cell.getValueLength() == 0) return null; 921 String hostAndPort = Bytes.toString( 922 cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 923 byte[] startcodeColumn = getStartCodeColumn(replicaId); 924 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 925 if (cell == null || cell.getValueLength() == 0) return null; 926 try { 927 return ServerName.valueOf(hostAndPort, 928 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 929 } catch (IllegalArgumentException e) { 930 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 931 return null; 932 } 933 } 934 935 /** 936 * The latest seqnum that the server writing to meta observed when opening the region. 937 * E.g. the seqNum when the result of {@link #getServerName(Result, int)} was written. 938 * @param r Result to pull the seqNum from 939 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 940 */ 941 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 942 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 943 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 944 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 945 } 946 947 /** 948 * Returns an HRegionLocationList extracted from the result. 949 * @return an HRegionLocationList containing all locations for the region range or null if 950 * we can't deserialize the result. 951 */ 952 @Nullable 953 public static RegionLocations getRegionLocations(final Result r) { 954 if (r == null) return null; 955 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 956 if (regionInfo == null) return null; 957 958 List<HRegionLocation> locations = new ArrayList<>(1); 959 NavigableMap<byte[],NavigableMap<byte[],byte[]>> familyMap = r.getNoVersionMap(); 960 961 locations.add(getRegionLocation(r, regionInfo, 0)); 962 963 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 964 if (infoMap == null) return new RegionLocations(locations); 965 966 // iterate until all serverName columns are seen 967 int replicaId = 0; 968 byte[] serverColumn = getServerColumn(replicaId); 969 SortedMap<byte[], byte[]> serverMap = null; 970 serverMap = infoMap.tailMap(serverColumn, false); 971 972 if (serverMap.isEmpty()) return new RegionLocations(locations); 973 974 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 975 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 976 if (replicaId < 0) { 977 break; 978 } 979 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 980 // In case the region replica is newly created, it's location might be null. We usually do not 981 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 982 if (location == null || location.getServerName() == null) { 983 locations.add(null); 984 } else { 985 locations.add(location); 986 } 987 } 988 989 return new RegionLocations(locations); 990 } 991 992 /** 993 * Returns the HRegionLocation parsed from the given meta row Result 994 * for the given regionInfo and replicaId. The regionInfo can be the default region info 995 * for the replica. 996 * @param r the meta row result 997 * @param regionInfo RegionInfo for default replica 998 * @param replicaId the replicaId for the HRegionLocation 999 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1000 */ 1001 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1002 final int replicaId) { 1003 ServerName serverName = getServerName(r, replicaId); 1004 long seqNum = getSeqNumDuringOpen(r, replicaId); 1005 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1006 return new HRegionLocation(replicaInfo, serverName, seqNum); 1007 } 1008 1009 /** 1010 * Returns RegionInfo object from the column 1011 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog 1012 * table Result. 1013 * @param data a Result object from the catalog table scan 1014 * @return RegionInfo or null 1015 */ 1016 public static RegionInfo getRegionInfo(Result data) { 1017 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1018 } 1019 1020 /** 1021 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1022 * <code>qualifier</code> of the catalog table result. 1023 * @param r a Result object from the catalog table scan 1024 * @param qualifier Column family qualifier 1025 * @return An RegionInfo instance or null. 1026 */ 1027 @Nullable 1028 private static RegionInfo getRegionInfo(final Result r, byte [] qualifier) { 1029 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1030 if (cell == null) return null; 1031 return RegionInfo.parseFromOrNull(cell.getValueArray(), 1032 cell.getValueOffset(), cell.getValueLength()); 1033 } 1034 1035 /** 1036 * Returns the daughter regions by reading the corresponding columns of the catalog table 1037 * Result. 1038 * @param data a Result object from the catalog table scan 1039 * @return a pair of RegionInfo or PairOfSameType(null, null) if the region is not a split 1040 * parent 1041 */ 1042 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1043 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1044 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1045 1046 return new PairOfSameType<>(splitA, splitB); 1047 } 1048 1049 /** 1050 * Returns the merge regions by reading the corresponding columns of the catalog table 1051 * Result. 1052 * @param data a Result object from the catalog table scan 1053 * @return a pair of RegionInfo or PairOfSameType(null, null) if the region is not a split 1054 * parent 1055 */ 1056 public static PairOfSameType<RegionInfo> getMergeRegions(Result data) { 1057 RegionInfo mergeA = getRegionInfo(data, HConstants.MERGEA_QUALIFIER); 1058 RegionInfo mergeB = getRegionInfo(data, HConstants.MERGEB_QUALIFIER); 1059 1060 return new PairOfSameType<>(mergeA, mergeB); 1061 } 1062 1063 /** 1064 * Fetch table state for given table from META table 1065 * @param conn connection to use 1066 * @param tableName table to fetch state for 1067 * @return state 1068 * @throws IOException 1069 */ 1070 @Nullable 1071 public static TableState getTableState(Connection conn, TableName tableName) 1072 throws IOException { 1073 if (tableName.equals(TableName.META_TABLE_NAME)) { 1074 return new TableState(tableName, TableState.State.ENABLED); 1075 } 1076 Table metaHTable = getMetaHTable(conn); 1077 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1078 Result result = metaHTable.get(get); 1079 return getTableState(result); 1080 } 1081 1082 /** 1083 * Fetch table states from META table 1084 * @param conn connection to use 1085 * @return map {tableName -> state} 1086 * @throws IOException 1087 */ 1088 public static Map<TableName, TableState> getTableStates(Connection conn) 1089 throws IOException { 1090 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1091 Visitor collector = new Visitor() { 1092 @Override 1093 public boolean visit(Result r) throws IOException { 1094 TableState state = getTableState(r); 1095 if (state != null) 1096 states.put(state.getTableName(), state); 1097 return true; 1098 } 1099 }; 1100 fullScanTables(conn, collector); 1101 return states; 1102 } 1103 1104 /** 1105 * Updates state in META 1106 * @param conn connection to use 1107 * @param tableName table to look for 1108 * @throws IOException 1109 */ 1110 public static void updateTableState(Connection conn, TableName tableName, 1111 TableState.State actual) throws IOException { 1112 updateTableState(conn, new TableState(tableName, actual)); 1113 } 1114 1115 /** 1116 * Decode table state from META Result. 1117 * Should contain cell from HConstants.TABLE_FAMILY 1118 * @param r result 1119 * @return null if not found 1120 */ 1121 @Nullable 1122 public static TableState getTableState(Result r) throws IOException { 1123 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1124 if (cell == null) { 1125 return null; 1126 } 1127 try { 1128 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1129 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1130 cell.getValueOffset() + cell.getValueLength())); 1131 } catch (DeserializationException e) { 1132 throw new IOException(e); 1133 } 1134 } 1135 1136 /** 1137 * Implementations 'visit' a catalog table row. 1138 */ 1139 public interface Visitor { 1140 /** 1141 * Visit the catalog table row. 1142 * @param r A row from catalog table 1143 * @return True if we are to proceed scanning the table, else false if 1144 * we are to stop now. 1145 */ 1146 boolean visit(final Result r) throws IOException; 1147 } 1148 1149 /** 1150 * Implementations 'visit' a catalog table row but with close() at the end. 1151 */ 1152 public interface CloseableVisitor extends Visitor, Closeable { 1153 } 1154 1155 /** 1156 * A {@link Visitor} that collects content out of passed {@link Result}. 1157 */ 1158 static abstract class CollectingVisitor<T> implements Visitor { 1159 final List<T> results = new ArrayList<>(); 1160 @Override 1161 public boolean visit(Result r) throws IOException { 1162 if (r != null && !r.isEmpty()) { 1163 add(r); 1164 } 1165 return true; 1166 } 1167 1168 abstract void add(Result r); 1169 1170 /** 1171 * @return Collected results; wait till visits complete to collect all 1172 * possible results 1173 */ 1174 List<T> getResults() { 1175 return this.results; 1176 } 1177 } 1178 1179 /** 1180 * Collects all returned. 1181 */ 1182 static class CollectAllVisitor extends CollectingVisitor<Result> { 1183 @Override 1184 void add(Result r) { 1185 this.results.add(r); 1186 } 1187 } 1188 1189 /** 1190 * A Visitor that skips offline regions and split parents 1191 */ 1192 public static abstract class DefaultVisitorBase implements Visitor { 1193 1194 public DefaultVisitorBase() { 1195 super(); 1196 } 1197 1198 public abstract boolean visitInternal(Result rowResult) throws IOException; 1199 1200 @Override 1201 public boolean visit(Result rowResult) throws IOException { 1202 RegionInfo info = getRegionInfo(rowResult); 1203 if (info == null) { 1204 return true; 1205 } 1206 1207 //skip over offline and split regions 1208 if (!(info.isOffline() || info.isSplit())) { 1209 return visitInternal(rowResult); 1210 } 1211 return true; 1212 } 1213 } 1214 1215 /** 1216 * A Visitor for a table. Provides a consistent view of the table's 1217 * hbase:meta entries during concurrent splits (see HBASE-5986 for details). This class 1218 * does not guarantee ordered traversal of meta entries, and can block until the 1219 * hbase:meta entries for daughters are available during splits. 1220 */ 1221 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1222 private TableName tableName; 1223 1224 public TableVisitorBase(TableName tableName) { 1225 super(); 1226 this.tableName = tableName; 1227 } 1228 1229 @Override 1230 public final boolean visit(Result rowResult) throws IOException { 1231 RegionInfo info = getRegionInfo(rowResult); 1232 if (info == null) { 1233 return true; 1234 } 1235 if (!(info.getTable().equals(tableName))) { 1236 return false; 1237 } 1238 return super.visit(rowResult); 1239 } 1240 } 1241 1242 /** 1243 * Count regions in <code>hbase:meta</code> for passed table. 1244 * @param c Configuration object 1245 * @param tableName table name to count regions for 1246 * @return Count or regions in table <code>tableName</code> 1247 */ 1248 public static int getRegionCount(final Configuration c, final TableName tableName) 1249 throws IOException { 1250 try (Connection connection = ConnectionFactory.createConnection(c)) { 1251 return getRegionCount(connection, tableName); 1252 } 1253 } 1254 1255 /** 1256 * Count regions in <code>hbase:meta</code> for passed table. 1257 * @param connection Connection object 1258 * @param tableName table name to count regions for 1259 * @return Count or regions in table <code>tableName</code> 1260 */ 1261 public static int getRegionCount(final Connection connection, final TableName tableName) 1262 throws IOException { 1263 try (RegionLocator locator = connection.getRegionLocator(tableName)) { 1264 List<HRegionLocation> locations = locator.getAllRegionLocations(); 1265 return locations == null ? 0 : locations.size(); 1266 } 1267 } 1268 1269 //////////////////////// 1270 // Editing operations // 1271 //////////////////////// 1272 /** 1273 * Generates and returns a Put containing the region into for the catalog table 1274 */ 1275 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1276 Put put = new Put(regionInfo.getRegionName(), ts); 1277 addRegionInfo(put, regionInfo); 1278 return put; 1279 } 1280 1281 /** 1282 * Generates and returns a Delete containing the region info for the catalog 1283 * table 1284 */ 1285 private static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1286 if (regionInfo == null) { 1287 throw new IllegalArgumentException("Can't make a delete for null region"); 1288 } 1289 Delete delete = new Delete(regionInfo.getRegionName()); 1290 delete.addFamily(getCatalogFamily(), ts); 1291 return delete; 1292 } 1293 1294 /** 1295 * Adds split daughters to the Put 1296 */ 1297 public static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1298 throws IOException { 1299 if (splitA != null) { 1300 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1301 .setRow(put.getRow()) 1302 .setFamily(HConstants.CATALOG_FAMILY) 1303 .setQualifier(HConstants.SPLITA_QUALIFIER) 1304 .setTimestamp(put.getTimestamp()) 1305 .setType(Type.Put) 1306 .setValue(RegionInfo.toByteArray(splitA)) 1307 .build()); 1308 } 1309 if (splitB != null) { 1310 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1311 .setRow(put.getRow()) 1312 .setFamily(HConstants.CATALOG_FAMILY) 1313 .setQualifier(HConstants.SPLITB_QUALIFIER) 1314 .setTimestamp(put.getTimestamp()) 1315 .setType(Type.Put) 1316 .setValue(RegionInfo.toByteArray(splitB)) 1317 .build()); 1318 } 1319 return put; 1320 } 1321 1322 /** 1323 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1324 * @param connection connection we're using 1325 * @param p Put to add to hbase:meta 1326 */ 1327 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1328 try (Table table = getMetaHTable(connection)) { 1329 put(table, p); 1330 } 1331 } 1332 1333 /** 1334 * @param t Table to use 1335 * @param p put to make 1336 */ 1337 private static void put(Table t, Put p) throws IOException { 1338 debugLogMutation(p); 1339 t.put(p); 1340 } 1341 1342 /** 1343 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1344 * @param connection connection we're using 1345 * @param ps Put to add to hbase:meta 1346 */ 1347 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1348 throws IOException { 1349 if (ps.isEmpty()) { 1350 return; 1351 } 1352 try (Table t = getMetaHTable(connection)) { 1353 debugLogMutations(ps); 1354 // the implementation for putting a single Put is much simpler so here we do a check first. 1355 if (ps.size() == 1) { 1356 t.put(ps.get(0)); 1357 } else { 1358 t.put(ps); 1359 } 1360 } 1361 } 1362 1363 /** 1364 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1365 * @param connection connection we're using 1366 * @param d Delete to add to hbase:meta 1367 */ 1368 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1369 throws IOException { 1370 List<Delete> dels = new ArrayList<>(1); 1371 dels.add(d); 1372 deleteFromMetaTable(connection, dels); 1373 } 1374 1375 /** 1376 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1377 * @param connection connection we're using 1378 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1379 */ 1380 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1381 throws IOException { 1382 try (Table t = getMetaHTable(connection)) { 1383 debugLogMutations(deletes); 1384 t.delete(deletes); 1385 } 1386 } 1387 1388 /** 1389 * Deletes some replica columns corresponding to replicas for the passed rows 1390 * @param metaRows rows in hbase:meta 1391 * @param replicaIndexToDeleteFrom the replica ID we would start deleting from 1392 * @param numReplicasToRemove how many replicas to remove 1393 * @param connection connection we're using to access meta table 1394 */ 1395 public static void removeRegionReplicasFromMeta(Set<byte[]> metaRows, 1396 int replicaIndexToDeleteFrom, int numReplicasToRemove, Connection connection) 1397 throws IOException { 1398 int absoluteIndex = replicaIndexToDeleteFrom + numReplicasToRemove; 1399 for (byte[] row : metaRows) { 1400 long now = EnvironmentEdgeManager.currentTime(); 1401 Delete deleteReplicaLocations = new Delete(row); 1402 for (int i = replicaIndexToDeleteFrom; i < absoluteIndex; i++) { 1403 deleteReplicaLocations.addColumns(getCatalogFamily(), 1404 getServerColumn(i), now); 1405 deleteReplicaLocations.addColumns(getCatalogFamily(), 1406 getSeqNumColumn(i), now); 1407 deleteReplicaLocations.addColumns(getCatalogFamily(), 1408 getStartCodeColumn(i), now); 1409 } 1410 deleteFromMetaTable(connection, deleteReplicaLocations); 1411 } 1412 } 1413 1414 /** 1415 * Execute the passed <code>mutations</code> against <code>hbase:meta</code> table. 1416 * @param connection connection we're using 1417 * @param mutations Puts and Deletes to execute on hbase:meta 1418 * @throws IOException 1419 */ 1420 public static void mutateMetaTable(final Connection connection, 1421 final List<Mutation> mutations) 1422 throws IOException { 1423 Table t = getMetaHTable(connection); 1424 try { 1425 debugLogMutations(mutations); 1426 t.batch(mutations, null); 1427 } catch (InterruptedException e) { 1428 InterruptedIOException ie = new InterruptedIOException(e.getMessage()); 1429 ie.initCause(e); 1430 throw ie; 1431 } finally { 1432 t.close(); 1433 } 1434 } 1435 1436 private static void addRegionStateToPut(Put put, RegionState.State state) throws IOException { 1437 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1438 .setRow(put.getRow()) 1439 .setFamily(HConstants.CATALOG_FAMILY) 1440 .setQualifier(getRegionStateColumn()) 1441 .setTimestamp(put.getTimestamp()) 1442 .setType(Cell.Type.Put) 1443 .setValue(Bytes.toBytes(state.name())) 1444 .build()); 1445 } 1446 1447 /** 1448 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1449 * add its daughter's as different rows, but adds information about the daughters in the same row 1450 * as the parent. Use 1451 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1452 * if you want to do that. 1453 * @param connection connection we're using 1454 * @param regionInfo RegionInfo of parent region 1455 * @param splitA first split daughter of the parent regionInfo 1456 * @param splitB second split daughter of the parent regionInfo 1457 * @throws IOException if problem connecting or updating meta 1458 */ 1459 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1460 RegionInfo splitA, RegionInfo splitB) throws IOException { 1461 Table meta = getMetaHTable(connection); 1462 try { 1463 Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1464 addDaughtersToPut(put, splitA, splitB); 1465 meta.put(put); 1466 debugLogMutation(put); 1467 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1468 } finally { 1469 meta.close(); 1470 } 1471 } 1472 1473 /** 1474 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1475 * does not add its daughter's as different rows, but adds information about the daughters 1476 * in the same row as the parent. Use 1477 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1478 * if you want to do that. 1479 * @param connection connection we're using 1480 * @param regionInfo region information 1481 * @throws IOException if problem connecting or updating meta 1482 */ 1483 @VisibleForTesting 1484 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1485 throws IOException { 1486 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1487 } 1488 1489 /** 1490 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1491 * is CLOSED. 1492 * @param connection connection we're using 1493 * @param regionInfos region information list 1494 * @throws IOException if problem connecting or updating meta 1495 */ 1496 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1497 int regionReplication) throws IOException { 1498 addRegionsToMeta(connection, regionInfos, regionReplication, 1499 EnvironmentEdgeManager.currentTime()); 1500 } 1501 1502 /** 1503 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1504 * is CLOSED. 1505 * @param connection connection we're using 1506 * @param regionInfos region information list 1507 * @param regionReplication 1508 * @param ts desired timestamp 1509 * @throws IOException if problem connecting or updating meta 1510 */ 1511 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1512 int regionReplication, long ts) throws IOException { 1513 List<Put> puts = new ArrayList<>(); 1514 for (RegionInfo regionInfo : regionInfos) { 1515 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1516 Put put = makePutFromRegionInfo(regionInfo, ts); 1517 // New regions are added with initial state of CLOSED. 1518 addRegionStateToPut(put, RegionState.State.CLOSED); 1519 // Add empty locations for region replicas so that number of replicas can be cached 1520 // whenever the primary region is looked up from meta 1521 for (int i = 1; i < regionReplication; i++) { 1522 addEmptyLocation(put, i); 1523 } 1524 puts.add(put); 1525 } 1526 } 1527 putsToMetaTable(connection, puts); 1528 LOG.info("Added {} regions to meta.", puts.size()); 1529 } 1530 1531 /** 1532 * Merge the two regions into one in an atomic operation. Deletes the two merging regions in 1533 * hbase:meta and adds the merged region with the information of two merging regions. 1534 * @param connection connection we're using 1535 * @param mergedRegion the merged region 1536 * @param regionA merge parent region A 1537 * @param regionAOpenSeqNum the next open sequence id for region A, used by serial replication. -1 1538 * if not necessary. 1539 * @param regionB merge parent region B 1540 * @param regionBOpenSeqNum the next open sequence id for region B, used by serial replication. -1 1541 * if not necessary. 1542 * @param sn the location of the region 1543 */ 1544 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1545 RegionInfo regionA, long regionAOpenSeqNum, RegionInfo regionB, long regionBOpenSeqNum, 1546 ServerName sn, int regionReplication) throws IOException { 1547 try (Table meta = getMetaHTable(connection)) { 1548 long time = EnvironmentEdgeManager.currentTime(); 1549 List<Mutation> mutations = new ArrayList<>(); 1550 1551 List<RegionInfo> replicationParents = new ArrayList<>(2); 1552 // Deletes for merging regions 1553 mutations.add(makeDeleteFromRegionInfo(regionA, time)); 1554 if (regionAOpenSeqNum > 0) { 1555 mutations.add(makePutForReplicationBarrier(regionA, regionAOpenSeqNum, time)); 1556 replicationParents.add(regionA); 1557 } 1558 mutations.add(makeDeleteFromRegionInfo(regionB, time)); 1559 if (regionBOpenSeqNum > 0) { 1560 mutations.add(makePutForReplicationBarrier(regionB, regionBOpenSeqNum, time)); 1561 replicationParents.add(regionB); 1562 } 1563 1564 // Put for parent 1565 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1566 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1567 .setRow(putOfMerged.getRow()) 1568 .setFamily(HConstants.CATALOG_FAMILY) 1569 .setQualifier(HConstants.MERGEA_QUALIFIER) 1570 .setTimestamp(putOfMerged.getTimestamp()) 1571 .setType(Type.Put) 1572 .setValue(RegionInfo.toByteArray(regionA)) 1573 .build()) 1574 .add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1575 .setRow(putOfMerged.getRow()) 1576 .setFamily(HConstants.CATALOG_FAMILY) 1577 .setQualifier(HConstants.MERGEB_QUALIFIER) 1578 .setTimestamp(putOfMerged.getTimestamp()) 1579 .setType(Type.Put) 1580 .setValue(RegionInfo.toByteArray(regionB)) 1581 .build()); 1582 // Set initial state to CLOSED 1583 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1584 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1585 // master tries to assign this offline region. This is followed by re-assignments of the 1586 // merged region from resumed {@link MergeTableRegionsProcedure} 1587 addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 1588 mutations.add(putOfMerged); 1589 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1590 // if crash after merge happened but before we got to here.. means in-memory 1591 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1592 // assign the merged region later. 1593 if (sn != null) { 1594 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1595 } 1596 1597 // Add empty locations for region replicas of the merged region so that number of replicas can 1598 // be cached whenever the primary region is looked up from meta 1599 for (int i = 1; i < regionReplication; i++) { 1600 addEmptyLocation(putOfMerged, i); 1601 } 1602 // add parent reference for serial replication 1603 if (!replicationParents.isEmpty()) { 1604 addReplicationParent(putOfMerged, replicationParents); 1605 } 1606 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1607 multiMutate(connection, meta, tableRow, mutations); 1608 } 1609 } 1610 1611 /** 1612 * Splits the region into two in an atomic operation. Offlines the parent region with the 1613 * information that it is split into two, and also adds the daughter regions. Does not add the 1614 * location information to the daughter regions since they are not open yet. 1615 * @param connection connection we're using 1616 * @param parent the parent region which is split 1617 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1618 * replication. -1 if not necessary. 1619 * @param splitA Split daughter region A 1620 * @param splitB Split daughter region B 1621 * @param sn the location of the region 1622 */ 1623 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1624 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) 1625 throws IOException { 1626 try (Table meta = getMetaHTable(connection)) { 1627 long time = EnvironmentEdgeManager.currentTime(); 1628 // Put for parent 1629 Put putParent = makePutFromRegionInfo(RegionInfoBuilder.newBuilder(parent) 1630 .setOffline(true) 1631 .setSplit(true).build(), time); 1632 addDaughtersToPut(putParent, splitA, splitB); 1633 1634 // Puts for daughters 1635 Put putA = makePutFromRegionInfo(splitA, time); 1636 Put putB = makePutFromRegionInfo(splitB, time); 1637 if (parentOpenSeqNum > 0) { 1638 addReplicationBarrier(putParent, parentOpenSeqNum); 1639 addReplicationParent(putA, Collections.singletonList(parent)); 1640 addReplicationParent(putB, Collections.singletonList(parent)); 1641 } 1642 // Set initial state to CLOSED 1643 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1644 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1645 // master tries to assign these offline regions. This is followed by re-assignments of the 1646 // daughter regions from resumed {@link SplitTableRegionProcedure} 1647 addRegionStateToPut(putA, RegionState.State.CLOSED); 1648 addRegionStateToPut(putB, RegionState.State.CLOSED); 1649 1650 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1651 addSequenceNum(putB, 1, splitB.getReplicaId()); 1652 1653 // Add empty locations for region replicas of daughters so that number of replicas can be 1654 // cached whenever the primary region is looked up from meta 1655 for (int i = 1; i < regionReplication; i++) { 1656 addEmptyLocation(putA, i); 1657 addEmptyLocation(putB, i); 1658 } 1659 1660 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1661 multiMutate(connection, meta, tableRow, putParent, putA, putB); 1662 } 1663 } 1664 1665 /** 1666 * Update state of the table in meta. 1667 * @param connection what we use for update 1668 * @param state new state 1669 */ 1670 private static void updateTableState(Connection connection, TableState state) throws IOException { 1671 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1672 putToMetaTable(connection, put); 1673 LOG.info("Updated {} in hbase:meta", state); 1674 } 1675 1676 /** 1677 * Construct PUT for given state 1678 * @param state new state 1679 */ 1680 public static Put makePutFromTableState(TableState state, long ts) { 1681 Put put = new Put(state.getTableName().getName(), ts); 1682 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1683 return put; 1684 } 1685 1686 /** 1687 * Remove state for table from meta 1688 * @param connection to use for deletion 1689 * @param table to delete state for 1690 */ 1691 public static void deleteTableState(Connection connection, TableName table) 1692 throws IOException { 1693 long time = EnvironmentEdgeManager.currentTime(); 1694 Delete delete = new Delete(table.getName()); 1695 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1696 deleteFromMetaTable(connection, delete); 1697 LOG.info("Deleted table " + table + " state from META"); 1698 } 1699 1700 private static void multiMutate(Connection connection, Table table, byte[] row, 1701 Mutation... mutations) throws IOException { 1702 multiMutate(connection, table, row, Arrays.asList(mutations)); 1703 } 1704 1705 /** 1706 * Performs an atomic multi-mutate operation against the given table. 1707 */ 1708 private static void multiMutate(Connection connection, final Table table, byte[] row, 1709 final List<Mutation> mutations) throws IOException { 1710 debugLogMutations(mutations); 1711 // TODO: Need rollback!!!! 1712 // TODO: Need Retry!!! 1713 // TODO: What for a timeout? Default write timeout? GET FROM HTABLE? 1714 // TODO: Review when we come through with ProcedureV2. 1715 RegionServerCallable<MutateRowsResponse, 1716 MultiRowMutationProtos.MultiRowMutationService.BlockingInterface> callable = 1717 new RegionServerCallable<MutateRowsResponse, 1718 MultiRowMutationProtos.MultiRowMutationService.BlockingInterface>( 1719 connection, table.getName(), row, null/*RpcController not used in this CPEP!*/) { 1720 @Override 1721 protected MutateRowsResponse rpcCall() throws Exception { 1722 final MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1723 for (Mutation mutation : mutations) { 1724 if (mutation instanceof Put) { 1725 builder.addMutationRequest(ProtobufUtil.toMutation( 1726 ClientProtos.MutationProto.MutationType.PUT, mutation)); 1727 } else if (mutation instanceof Delete) { 1728 builder.addMutationRequest(ProtobufUtil.toMutation( 1729 ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1730 } else { 1731 throw new DoNotRetryIOException("multi in MetaEditor doesn't support " 1732 + mutation.getClass().getName()); 1733 } 1734 } 1735 // The call to #prepare that ran before this invocation will have populated HRegionLocation. 1736 HRegionLocation hrl = getLocation(); 1737 RegionSpecifier region = ProtobufUtil.buildRegionSpecifier( 1738 RegionSpecifierType.REGION_NAME, hrl.getRegion().getRegionName()); 1739 builder.setRegion(region); 1740 // The rpcController here is awkward. The Coprocessor Endpoint wants an instance of a 1741 // com.google.protobuf but we are going over an rpc that is all shaded protobuf so it 1742 // wants a org.apache.h.h.shaded.com.google.protobuf.RpcController. Set up a factory 1743 // that makes com.google.protobuf.RpcController and then copy into it configs. 1744 return getStub().mutateRows(null, builder.build()); 1745 } 1746 1747 @Override 1748 // Called on the end of the super.prepare call. Set the stub. 1749 protected void setStubByServiceName(ServerName serviceName/*Ignored*/) throws IOException { 1750 CoprocessorRpcChannel channel = table.coprocessorService(getRow()); 1751 setStub(MultiRowMutationProtos.MultiRowMutationService.newBlockingStub(channel)); 1752 } 1753 }; 1754 int writeTimeout = connection.getConfiguration().getInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, 1755 connection.getConfiguration().getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 1756 HConstants.DEFAULT_HBASE_RPC_TIMEOUT)); 1757 // The region location should be cached in connection. Call prepare so this callable picks 1758 // up the region location (see super.prepare method). 1759 callable.prepare(false); 1760 callable.call(writeTimeout); 1761 } 1762 1763 /** 1764 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1765 * and startcode. 1766 * <p> 1767 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1768 * edits to that region. 1769 * @param connection connection we're using 1770 * @param regionInfo region to update location of 1771 * @param openSeqNum the latest sequence number obtained when the region was open 1772 * @param sn Server name 1773 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1774 */ 1775 @VisibleForTesting 1776 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1777 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1778 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1779 } 1780 1781 /** 1782 * Updates the location of the specified region to be the specified server. 1783 * <p> 1784 * Connects to the specified server which should be hosting the specified catalog region name to 1785 * perform the edit. 1786 * @param connection connection we're using 1787 * @param regionInfo region to update location of 1788 * @param sn Server name 1789 * @param openSeqNum the latest sequence number obtained when the region was open 1790 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1791 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1792 * is down on other end. 1793 */ 1794 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1795 long openSeqNum, long masterSystemTime) throws IOException { 1796 // region replicas are kept in the primary region's row 1797 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1798 addRegionInfo(put, regionInfo); 1799 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1800 putToMetaTable(connection, put); 1801 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1802 } 1803 1804 /** 1805 * Deletes the specified region from META. 1806 * @param connection connection we're using 1807 * @param regionInfo region to be deleted from META 1808 * @throws IOException 1809 */ 1810 public static void deleteRegion(Connection connection, RegionInfo regionInfo) throws IOException { 1811 long time = EnvironmentEdgeManager.currentTime(); 1812 Delete delete = new Delete(regionInfo.getRegionName()); 1813 delete.addFamily(getCatalogFamily(), time); 1814 deleteFromMetaTable(connection, delete); 1815 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1816 } 1817 1818 /** 1819 * Deletes the specified regions from META. 1820 * @param connection connection we're using 1821 * @param regionsInfo list of regions to be deleted from META 1822 */ 1823 public static void deleteRegions(Connection connection, List<RegionInfo> regionsInfo) 1824 throws IOException { 1825 deleteRegions(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1826 } 1827 /** 1828 * Deletes the specified regions from META. 1829 * @param connection connection we're using 1830 * @param regionsInfo list of regions to be deleted from META 1831 */ 1832 public static void deleteRegions(Connection connection, List<RegionInfo> regionsInfo, long ts) 1833 throws IOException { 1834 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1835 for (RegionInfo hri : regionsInfo) { 1836 Delete e = new Delete(hri.getRegionName()); 1837 e.addFamily(getCatalogFamily(), ts); 1838 deletes.add(e); 1839 } 1840 deleteFromMetaTable(connection, deletes); 1841 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1842 LOG.debug("Deleted regions: {}", regionsInfo); 1843 } 1844 1845 /** 1846 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1847 * adds new ones. Regions added back have state CLOSED. 1848 * @param connection connection we're using 1849 * @param regionInfos list of regions to be added to META 1850 */ 1851 public static void overwriteRegions(Connection connection, 1852 List<RegionInfo> regionInfos, int regionReplication) throws IOException { 1853 // use master time for delete marker and the Put 1854 long now = EnvironmentEdgeManager.currentTime(); 1855 deleteRegions(connection, regionInfos, now); 1856 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1857 // eclipse the following puts, that might happen in the same ts from the server. 1858 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1859 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1860 // 1861 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1862 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1863 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1864 LOG.debug("Overwritten regions: {} ", regionInfos); 1865 } 1866 1867 /** 1868 * Deletes merge qualifiers for the specified merged region. 1869 * @param connection connection we're using 1870 * @param mergedRegion the merged region 1871 */ 1872 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergedRegion) 1873 throws IOException { 1874 long time = EnvironmentEdgeManager.currentTime(); 1875 Delete delete = new Delete(mergedRegion.getRegionName()); 1876 delete.addColumns(getCatalogFamily(), HConstants.MERGEA_QUALIFIER, time); 1877 delete.addColumns(getCatalogFamily(), HConstants.MERGEB_QUALIFIER, time); 1878 deleteFromMetaTable(connection, delete); 1879 LOG.info("Deleted references in merged region " 1880 + mergedRegion.getRegionNameAsString() + ", qualifier=" 1881 + Bytes.toStringBinary(HConstants.MERGEA_QUALIFIER) + " and qualifier=" 1882 + Bytes.toStringBinary(HConstants.MERGEB_QUALIFIER)); 1883 } 1884 1885 public static Put addRegionInfo(final Put p, final RegionInfo hri) 1886 throws IOException { 1887 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1888 .setRow(p.getRow()) 1889 .setFamily(getCatalogFamily()) 1890 .setQualifier(HConstants.REGIONINFO_QUALIFIER) 1891 .setTimestamp(p.getTimestamp()) 1892 .setType(Type.Put) 1893 .setValue(RegionInfo.toByteArray(hri)) 1894 .build()); 1895 return p; 1896 } 1897 1898 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1899 throws IOException { 1900 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1901 return p.add(builder.clear() 1902 .setRow(p.getRow()) 1903 .setFamily(getCatalogFamily()) 1904 .setQualifier(getServerColumn(replicaId)) 1905 .setTimestamp(p.getTimestamp()) 1906 .setType(Cell.Type.Put) 1907 .setValue(Bytes.toBytes(sn.getAddress().toString())) 1908 .build()) 1909 .add(builder.clear() 1910 .setRow(p.getRow()) 1911 .setFamily(getCatalogFamily()) 1912 .setQualifier(getStartCodeColumn(replicaId)) 1913 .setTimestamp(p.getTimestamp()) 1914 .setType(Cell.Type.Put) 1915 .setValue(Bytes.toBytes(sn.getStartcode())) 1916 .build()) 1917 .add(builder.clear() 1918 .setRow(p.getRow()) 1919 .setFamily(getCatalogFamily()) 1920 .setQualifier(getSeqNumColumn(replicaId)) 1921 .setTimestamp(p.getTimestamp()) 1922 .setType(Type.Put) 1923 .setValue(Bytes.toBytes(openSeqNum)) 1924 .build()); 1925 } 1926 1927 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 1928 for (byte b : regionName) { 1929 if (b == ESCAPE_BYTE) { 1930 out.write(ESCAPE_BYTE); 1931 } 1932 out.write(b); 1933 } 1934 } 1935 1936 @VisibleForTesting 1937 public static byte[] getParentsBytes(List<RegionInfo> parents) { 1938 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1939 Iterator<RegionInfo> iter = parents.iterator(); 1940 writeRegionName(bos, iter.next().getRegionName()); 1941 while (iter.hasNext()) { 1942 bos.write(ESCAPE_BYTE); 1943 bos.write(SEPARATED_BYTE); 1944 writeRegionName(bos, iter.next().getRegionName()); 1945 } 1946 return bos.toByteArray(); 1947 } 1948 1949 private static List<byte[]> parseParentsBytes(byte[] bytes) { 1950 List<byte[]> parents = new ArrayList<>(); 1951 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1952 for (int i = 0; i < bytes.length; i++) { 1953 if (bytes[i] == ESCAPE_BYTE) { 1954 i++; 1955 if (bytes[i] == SEPARATED_BYTE) { 1956 parents.add(bos.toByteArray()); 1957 bos.reset(); 1958 continue; 1959 } 1960 // fall through to append the byte 1961 } 1962 bos.write(bytes[i]); 1963 } 1964 if (bos.size() > 0) { 1965 parents.add(bos.toByteArray()); 1966 } 1967 return parents; 1968 } 1969 1970 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 1971 byte[] value = getParentsBytes(parents); 1972 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1973 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 1974 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(value).build()); 1975 } 1976 1977 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 1978 throws IOException { 1979 Put put = new Put(regionInfo.getRegionName(), ts); 1980 addReplicationBarrier(put, openSeqNum); 1981 return put; 1982 } 1983 1984 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 1985 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1986 .setRow(put.getRow()) 1987 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY) 1988 .setQualifier(HConstants.SEQNUM_QUALIFIER) 1989 .setTimestamp(put.getTimestamp()) 1990 .setType(Type.Put) 1991 .setValue(Bytes.toBytes(openSeqNum)) 1992 .build()); 1993 } 1994 1995 private static Put addEmptyLocation(Put p, int replicaId) throws IOException { 1996 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1997 return p.add(builder.clear() 1998 .setRow(p.getRow()) 1999 .setFamily(getCatalogFamily()) 2000 .setQualifier(getServerColumn(replicaId)) 2001 .setTimestamp(p.getTimestamp()) 2002 .setType(Type.Put) 2003 .build()) 2004 .add(builder.clear() 2005 .setRow(p.getRow()) 2006 .setFamily(getCatalogFamily()) 2007 .setQualifier(getStartCodeColumn(replicaId)) 2008 .setTimestamp(p.getTimestamp()) 2009 .setType(Cell.Type.Put) 2010 .build()) 2011 .add(builder.clear() 2012 .setRow(p.getRow()) 2013 .setFamily(getCatalogFamily()) 2014 .setQualifier(getSeqNumColumn(replicaId)) 2015 .setTimestamp(p.getTimestamp()) 2016 .setType(Cell.Type.Put) 2017 .build()); 2018 } 2019 2020 public static final class ReplicationBarrierResult { 2021 private final long[] barriers; 2022 private final RegionState.State state; 2023 private final List<byte[]> parentRegionNames; 2024 2025 public ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 2026 this.barriers = barriers; 2027 this.state = state; 2028 this.parentRegionNames = parentRegionNames; 2029 } 2030 2031 public long[] getBarriers() { 2032 return barriers; 2033 } 2034 2035 public RegionState.State getState() { 2036 return state; 2037 } 2038 2039 public List<byte[]> getParentRegionNames() { 2040 return parentRegionNames; 2041 } 2042 2043 @Override 2044 public String toString() { 2045 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + 2046 state + ", parentRegionNames=" + 2047 parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) + 2048 "]"; 2049 } 2050 } 2051 2052 private static long getReplicationBarrier(Cell c) { 2053 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 2054 } 2055 2056 public static long[] getReplicationBarriers(Result result) { 2057 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2058 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 2059 } 2060 2061 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2062 long[] barriers = getReplicationBarriers(result); 2063 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2064 RegionState.State state = 2065 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2066 byte[] parentRegionsBytes = 2067 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2068 List<byte[]> parentRegionNames = 2069 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2070 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2071 } 2072 2073 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2074 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2075 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2076 byte[] metaStopKey = 2077 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2078 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2079 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2080 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2081 .setCaching(10); 2082 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2083 for (Result result;;) { 2084 result = scanner.next(); 2085 if (result == null) { 2086 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2087 } 2088 byte[] regionName = result.getRow(); 2089 // TODO: we may look up a region which has already been split or merged so we need to check 2090 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2091 // record for the given region, for now it will scan to the end of the table. 2092 if (!Bytes.equals(encodedRegionName, 2093 Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))) { 2094 continue; 2095 } 2096 return getReplicationBarrierResult(result); 2097 } 2098 } 2099 } 2100 2101 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2102 throws IOException { 2103 try (Table table = getMetaHTable(conn)) { 2104 Result result = table.get(new Get(regionName) 2105 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2106 .readAllVersions()); 2107 return getReplicationBarriers(result); 2108 } 2109 } 2110 2111 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2112 TableName tableName) throws IOException { 2113 List<Pair<String, Long>> list = new ArrayList<>(); 2114 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2115 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2116 byte[] value = 2117 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2118 if (value == null) { 2119 return true; 2120 } 2121 long lastBarrier = Bytes.toLong(value); 2122 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2123 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2124 return true; 2125 }); 2126 return list; 2127 } 2128 2129 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2130 TableName tableName) throws IOException { 2131 List<String> list = new ArrayList<>(); 2132 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2133 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2134 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2135 list.add(RegionInfo.encodeRegionName(r.getRow())); 2136 return true; 2137 }); 2138 return list; 2139 } 2140 2141 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2142 if (!METALOG.isDebugEnabled()) { 2143 return; 2144 } 2145 // Logging each mutation in separate line makes it easier to see diff between them visually 2146 // because of common starting indentation. 2147 for (Mutation mutation : mutations) { 2148 debugLogMutation(mutation); 2149 } 2150 } 2151 2152 private static void debugLogMutation(Mutation p) throws IOException { 2153 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2154 } 2155 2156 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2157 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2158 .setRow(p.getRow()) 2159 .setFamily(HConstants.CATALOG_FAMILY) 2160 .setQualifier(getSeqNumColumn(replicaId)) 2161 .setTimestamp(p.getTimestamp()) 2162 .setType(Type.Put) 2163 .setValue(Bytes.toBytes(openSeqNum)) 2164 .build()); 2165 } 2166}