001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.wal; 020 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.List; 024import java.util.Map; 025import java.util.Set; 026import java.util.TreeSet; 027 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.HBaseInterfaceAudience; 031import org.apache.hadoop.hbase.PrivateCellUtil; 032import org.apache.hadoop.hbase.KeyValue; 033import org.apache.hadoop.hbase.client.RegionInfo; 034import org.apache.hadoop.hbase.codec.Codec; 035import org.apache.hadoop.hbase.io.HeapSize; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.util.ClassSize; 038import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor; 045import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.FlushDescriptor; 046import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDescriptor; 047 048 049/** 050 * Used in HBase's transaction log (WAL) to represent a collection of edits (Cell/KeyValue objects) 051 * that came in as a single transaction. All the edits for a given transaction are written out as a 052 * single record, in PB format, followed (optionally) by Cells written via the WALCellEncoder. 053 * <p>This class is LimitedPrivate for CPs to read-only. The {@link #add} methods are 054 * classified as private methods, not for use by CPs.</p> 055 * <p>WALEdit will accumulate a Set of all column family names referenced by the Cells 056 * {@link #add(Cell)}'d. This is an optimization. Usually when loading a WALEdit, we have the 057 * column family name to-hand.. just shove it into the WALEdit if available. Doing this, we can 058 * save on a parse of each Cell to figure column family down the line when we go to add the 059 * WALEdit to the WAL file. See the hand-off in FSWALEntry Constructor. 060 */ 061// TODO: Do not expose this class to Coprocessors. It has set methods. A CP might meddle. 062@InterfaceAudience.LimitedPrivate({ HBaseInterfaceAudience.REPLICATION, 063 HBaseInterfaceAudience.COPROC }) 064public class WALEdit implements HeapSize { 065 private static final Logger LOG = LoggerFactory.getLogger(WALEdit.class); 066 067 // TODO: Get rid of this; see HBASE-8457 068 public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY"); 069 @VisibleForTesting 070 public static final byte [] METAROW = Bytes.toBytes("METAROW"); 071 @VisibleForTesting 072 public static final byte[] COMPACTION = Bytes.toBytes("HBASE::COMPACTION"); 073 @VisibleForTesting 074 public static final byte [] FLUSH = Bytes.toBytes("HBASE::FLUSH"); 075 @VisibleForTesting 076 public static final byte [] REGION_EVENT = Bytes.toBytes("HBASE::REGION_EVENT"); 077 @VisibleForTesting 078 public static final byte [] BULK_LOAD = Bytes.toBytes("HBASE::BULK_LOAD"); 079 080 private final boolean replay; 081 082 private ArrayList<Cell> cells = null; 083 084 /** 085 * All the Cell families in <code>cells</code>. Updated by {@link #add(Cell)} and 086 * {@link #add(Map)}. This Set is passed to the FSWALEntry so it does not have 087 * to recalculate the Set of families in a transaction; makes for a bunch of CPU savings. 088 * An optimization that saves on CPU-expensive Cell-parsing. 089 */ 090 private Set<byte []> families = null; 091 092 public WALEdit() { 093 this(false); 094 } 095 096 /** 097 * @deprecated Since 2.0.1. Use {@link #WALEdit(int, boolean)} instead. 098 */ 099 @Deprecated 100 public WALEdit(boolean isReplay) { 101 this(1, isReplay); 102 } 103 104 /** 105 * @deprecated Since 2.0.1. Use {@link #WALEdit(int, boolean)} instead. 106 */ 107 @Deprecated 108 public WALEdit(int cellCount) { 109 this(cellCount, false); 110 } 111 112 /** 113 * @param cellCount Pass so can pre-size the WALEdit. Optimization. 114 */ 115 public WALEdit(int cellCount, boolean isReplay) { 116 this.replay = isReplay; 117 cells = new ArrayList<>(cellCount); 118 } 119 120 private Set<byte[]> getOrCreateFamilies() { 121 if (this.families == null) { 122 this.families = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR); 123 } 124 return this.families; 125 } 126 127 /** 128 * For use by FSWALEntry ONLY. An optimization. 129 * @return All families in {@link #getCells()}; may be null. 130 */ 131 public Set<byte []> getFamilies() { 132 return this.families; 133 } 134 135 /** 136 * @return True is <code>f</code> is {@link #METAFAMILY} 137 */ 138 public static boolean isMetaEditFamily(final byte [] f) { 139 return Bytes.equals(METAFAMILY, f); 140 } 141 142 public static boolean isMetaEditFamily(Cell cell) { 143 return CellUtil.matchingFamily(cell, METAFAMILY); 144 } 145 146 public boolean isMetaEdit() { 147 for (Cell cell: cells) { 148 if (!isMetaEditFamily(cell)) { 149 return false; 150 } 151 } 152 return true; 153 } 154 155 /** 156 * @return True when current WALEdit is created by log replay. Replication skips WALEdits from 157 * replay. 158 */ 159 public boolean isReplay() { 160 return this.replay; 161 } 162 163 @InterfaceAudience.Private 164 public WALEdit add(Cell cell, byte [] family) { 165 getOrCreateFamilies().add(family); 166 return addCell(cell); 167 } 168 169 @InterfaceAudience.Private 170 public WALEdit add(Cell cell) { 171 // We clone Family each time we add a Cell. Expensive but safe. For CPU savings, use 172 // add(Map) or add(Cell, family). 173 return add(cell, CellUtil.cloneFamily(cell)); 174 } 175 176 public boolean isEmpty() { 177 return cells.isEmpty(); 178 } 179 180 public int size() { 181 return cells.size(); 182 } 183 184 public ArrayList<Cell> getCells() { 185 return cells; 186 } 187 188 /** 189 * This is not thread safe. 190 * This will change the WALEdit and shouldn't be used unless you are sure that nothing 191 * else depends on the contents being immutable. 192 * 193 * @param cells the list of cells that this WALEdit now contains. 194 */ 195 @InterfaceAudience.Private 196 // Used by replay. 197 public void setCells(ArrayList<Cell> cells) { 198 this.cells = cells; 199 this.families = null; 200 } 201 202 /** 203 * Reads WALEdit from cells. 204 * @param cellDecoder Cell decoder. 205 * @param expectedCount Expected cell count. 206 * @return Number of KVs read. 207 */ 208 public int readFromCells(Codec.Decoder cellDecoder, int expectedCount) throws IOException { 209 cells.clear(); 210 cells.ensureCapacity(expectedCount); 211 while (cells.size() < expectedCount && cellDecoder.advance()) { 212 cells.add(cellDecoder.current()); 213 } 214 return cells.size(); 215 } 216 217 @Override 218 public long heapSize() { 219 long ret = ClassSize.ARRAYLIST; 220 for (Cell cell : cells) { 221 ret += PrivateCellUtil.estimatedSizeOfCell(cell); 222 } 223 return ret; 224 } 225 226 public long estimatedSerializedSizeOf() { 227 long ret = 0; 228 for (Cell cell: cells) { 229 ret += PrivateCellUtil.estimatedSerializedSizeOf(cell); 230 } 231 return ret; 232 } 233 234 @Override 235 public String toString() { 236 StringBuilder sb = new StringBuilder(); 237 238 sb.append("[#edits: " + cells.size() + " = <"); 239 for (Cell cell : cells) { 240 sb.append(cell); 241 sb.append("; "); 242 } 243 sb.append(">]"); 244 return sb.toString(); 245 } 246 247 public static WALEdit createFlushWALEdit(RegionInfo hri, FlushDescriptor f) { 248 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, FLUSH, 249 EnvironmentEdgeManager.currentTime(), f.toByteArray()); 250 return new WALEdit().add(kv, METAFAMILY); 251 } 252 253 public static FlushDescriptor getFlushDescriptor(Cell cell) throws IOException { 254 if (CellUtil.matchingColumn(cell, METAFAMILY, FLUSH)) { 255 return FlushDescriptor.parseFrom(CellUtil.cloneValue(cell)); 256 } 257 return null; 258 } 259 260 public static WALEdit createRegionEventWALEdit(RegionInfo hri, 261 RegionEventDescriptor regionEventDesc) { 262 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, REGION_EVENT, 263 EnvironmentEdgeManager.currentTime(), regionEventDesc.toByteArray()); 264 return new WALEdit().add(kv, METAFAMILY); 265 } 266 267 public static RegionEventDescriptor getRegionEventDescriptor(Cell cell) throws IOException { 268 if (CellUtil.matchingColumn(cell, METAFAMILY, REGION_EVENT)) { 269 return RegionEventDescriptor.parseFrom(CellUtil.cloneValue(cell)); 270 } 271 return null; 272 } 273 274 /** 275 * Create a compaction WALEdit 276 * @param c 277 * @return A WALEdit that has <code>c</code> serialized as its value 278 */ 279 public static WALEdit createCompaction(final RegionInfo hri, final CompactionDescriptor c) { 280 byte [] pbbytes = c.toByteArray(); 281 KeyValue kv = new KeyValue(getRowForRegion(hri), METAFAMILY, COMPACTION, 282 EnvironmentEdgeManager.currentTime(), pbbytes); 283 return new WALEdit().add(kv, METAFAMILY); //replication scope null so this won't be replicated 284 } 285 286 public static byte[] getRowForRegion(RegionInfo hri) { 287 byte[] startKey = hri.getStartKey(); 288 if (startKey.length == 0) { 289 // empty row key is not allowed in mutations because it is both the start key and the end key 290 // we return the smallest byte[] that is bigger (in lex comparison) than byte[0]. 291 return new byte[] {0}; 292 } 293 return startKey; 294 } 295 296 /** 297 * Deserialized and returns a CompactionDescriptor is the KeyValue contains one. 298 * @param kv the key value 299 * @return deserialized CompactionDescriptor or null. 300 */ 301 public static CompactionDescriptor getCompaction(Cell kv) throws IOException { 302 if (isCompactionMarker(kv)) { 303 return CompactionDescriptor.parseFrom(CellUtil.cloneValue(kv)); 304 } 305 return null; 306 } 307 308 /** 309 * Returns true if the given cell is a serialized {@link CompactionDescriptor} 310 * 311 * @see #getCompaction(Cell) 312 */ 313 public static boolean isCompactionMarker(Cell cell) { 314 return CellUtil.matchingColumn(cell, METAFAMILY, COMPACTION); 315 } 316 317 /** 318 * Create a bulk loader WALEdit 319 * 320 * @param hri The RegionInfo for the region in which we are bulk loading 321 * @param bulkLoadDescriptor The descriptor for the Bulk Loader 322 * @return The WALEdit for the BulkLoad 323 */ 324 public static WALEdit createBulkLoadEvent(RegionInfo hri, 325 WALProtos.BulkLoadDescriptor bulkLoadDescriptor) { 326 KeyValue kv = new KeyValue(getRowForRegion(hri), 327 METAFAMILY, 328 BULK_LOAD, 329 EnvironmentEdgeManager.currentTime(), 330 bulkLoadDescriptor.toByteArray()); 331 return new WALEdit().add(kv, METAFAMILY); 332 } 333 334 /** 335 * Deserialized and returns a BulkLoadDescriptor from the passed in Cell 336 * @param cell the key value 337 * @return deserialized BulkLoadDescriptor or null. 338 */ 339 public static WALProtos.BulkLoadDescriptor getBulkLoadDescriptor(Cell cell) throws IOException { 340 if (CellUtil.matchingColumn(cell, METAFAMILY, BULK_LOAD)) { 341 return WALProtos.BulkLoadDescriptor.parseFrom(CellUtil.cloneValue(cell)); 342 } 343 return null; 344 } 345 346 /** 347 * Append the given map of family->edits to a WALEdit data structure. 348 * This does not write to the WAL itself. 349 * Note that as an optimization, we will stamp the Set of column families into the WALEdit 350 * to save on our having to calculate it subsequently way down in the actual WAL writing. 351 * 352 * @param familyMap map of family->edits 353 */ 354 public void add(Map<byte[], List<Cell>> familyMap) { 355 for (Map.Entry<byte [], List<Cell>> e: familyMap.entrySet()) { 356 // 'foreach' loop NOT used. See HBASE-12023 "...creates too many iterator objects." 357 int listSize = e.getValue().size(); 358 // Add all Cells first and then at end, add the family rather than call {@link #add(Cell)} 359 // and have it clone family each time. Optimization! 360 for (int i = 0; i < listSize; i++) { 361 addCell(e.getValue().get(i)); 362 } 363 addFamily(e.getKey()); 364 } 365 } 366 367 private void addFamily(byte [] family) { 368 getOrCreateFamilies().add(family); 369 } 370 371 private WALEdit addCell(Cell cell) { 372 this.cells.add(cell); 373 return this; 374 } 375}