001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 * <p>
010 * http://www.apache.org/licenses/LICENSE-2.0
011 * <p>
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.querymatcher;
019
020import java.io.IOException;
021import java.util.HashMap;
022import java.util.Map;
023import java.util.NavigableMap;
024import java.util.NavigableSet;
025import java.util.SortedMap;
026import java.util.SortedSet;
027import java.util.TreeMap;
028import java.util.TreeSet;
029
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellComparator;
032import org.apache.hadoop.hbase.CellUtil;
033import org.apache.hadoop.hbase.PrivateCellUtil;
034import org.apache.hadoop.hbase.KeyValue.Type;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode;
037import org.apache.hadoop.hbase.util.Bytes;
038
039/**
040 * A tracker both implementing ColumnTracker and DeleteTracker, used for mvcc-sensitive scanning.
041 * We should make sure in one QueryMatcher the ColumnTracker and DeleteTracker is the same instance.
042 */
043@InterfaceAudience.Private
044public class NewVersionBehaviorTracker implements ColumnTracker, DeleteTracker {
045
046  private byte[] lastCqArray;
047  private int lastCqLength;
048  private int lastCqOffset;
049  private long lastCqTs;
050  private long lastCqMvcc;
051  private byte lastCqType;
052  private int columnIndex;
053  private int countCurrentCol;
054
055  protected int maxVersions;
056  private int resultMaxVersions;
057  private byte[][] columns;
058  private int minVersions;
059  private long oldestStamp;
060  private CellComparator comparator;
061
062  // These two maps have same structure.
063  // Each node is a versions deletion (DeleteFamily or DeleteColumn). Key is the mvcc of the marker,
064  // value is a data structure which contains infos we need that happens before this node's mvcc and
065  // after the previous node's mvcc. The last node is a special node whose key is max_long that
066  // saves infos after last deletion. See DeleteVersionsNode's comments for details.
067  // The delColMap is constructed and used for each cq, and thedelFamMap is constructed when cq is
068  // null and saving family-level delete markers. Each time the cq is changed, we should
069  // reconstruct delColMap as a deep copy of delFamMap.
070  protected NavigableMap<Long, DeleteVersionsNode> delColMap = new TreeMap<>();
071  protected NavigableMap<Long, DeleteVersionsNode> delFamMap = new TreeMap<>();
072
073  /**
074   * Note maxVersion and minVersion must set according to cf's conf, not user's scan parameter.
075   *
076   * @param columns           columns specified user in query
077   * @param comparartor       the cell comparator
078   * @param minVersion        The minimum number of versions to keep(used when TTL is set).
079   * @param maxVersion        The maximum number of versions in CF's conf
080   * @param resultMaxVersions maximum versions to return per column, which may be different from
081   *                          maxVersion
082   * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
083   */
084  public NewVersionBehaviorTracker(NavigableSet<byte[]> columns, CellComparator comparartor,
085      int minVersion, int maxVersion, int resultMaxVersions, long oldestUnexpiredTS) {
086    this.maxVersions = maxVersion;
087    this.minVersions = minVersion;
088    this.resultMaxVersions = resultMaxVersions;
089    this.oldestStamp = oldestUnexpiredTS;
090    if (columns != null && columns.size() > 0) {
091      this.columns = new byte[columns.size()][];
092      int i = 0;
093      for (byte[] column : columns) {
094        this.columns[i++] = column;
095      }
096    }
097    this.comparator = comparartor;
098    reset();
099  }
100
101  @Override
102  public void beforeShipped() throws IOException {
103    // Do nothing
104  }
105
106  /**
107   * A data structure which contains infos we need that happens before this node's mvcc and
108   * after the previous node's mvcc. A node means there is a version deletion at the mvcc and ts.
109   */
110  protected class DeleteVersionsNode {
111    public long ts;
112    public long mvcc;
113
114    // <timestamp, set<mvcc>>
115    // Key is ts of version deletes, value is its mvccs.
116    // We may delete more than one time for a version.
117    private Map<Long, SortedSet<Long>> deletesMap = new HashMap<>();
118
119    // <mvcc, set<mvcc>>
120    // Key is mvcc of version deletes, value is mvcc of visible puts before the delete effect.
121    private NavigableMap<Long, SortedSet<Long>> mvccCountingMap = new TreeMap<>();
122
123    protected DeleteVersionsNode(long ts, long mvcc) {
124      this.ts = ts;
125      this.mvcc = mvcc;
126      mvccCountingMap.put(Long.MAX_VALUE, new TreeSet<Long>());
127    }
128
129    protected DeleteVersionsNode() {
130      this(Long.MIN_VALUE, Long.MAX_VALUE);
131    }
132
133    public void addVersionDelete(Cell cell) {
134      SortedSet<Long> set = deletesMap.get(cell.getTimestamp());
135      if (set == null) {
136        set = new TreeSet<>();
137        deletesMap.put(cell.getTimestamp(), set);
138      }
139      set.add(cell.getSequenceId());
140      // The init set should be the puts whose mvcc is smaller than this Delete. Because
141      // there may be some Puts masked by them. The Puts whose mvcc is larger than this Delete can
142      // not be copied to this node because we may delete one version and the oldest put may not be
143      // masked.
144      SortedSet<Long> nextValue = mvccCountingMap.ceilingEntry(cell.getSequenceId()).getValue();
145      SortedSet<Long> thisValue = new TreeSet<>(nextValue.headSet(cell.getSequenceId()));
146      mvccCountingMap.put(cell.getSequenceId(), thisValue);
147    }
148
149    protected DeleteVersionsNode getDeepCopy() {
150      DeleteVersionsNode node = new DeleteVersionsNode(ts, mvcc);
151      for (Map.Entry<Long, SortedSet<Long>> e : deletesMap.entrySet()) {
152        node.deletesMap.put(e.getKey(), new TreeSet<>(e.getValue()));
153      }
154      for (Map.Entry<Long, SortedSet<Long>> e : mvccCountingMap.entrySet()) {
155        node.mvccCountingMap.put(e.getKey(), new TreeSet<>(e.getValue()));
156      }
157      return node;
158    }
159  }
160
161  /**
162   * Reset the map if it is different with the last Cell.
163   * Save the cq array/offset/length for next Cell.
164   *
165   * @return If this put has duplicate ts with last cell, return the mvcc of last cell.
166   * Else return MAX_VALUE.
167   */
168  protected long prepare(Cell cell) {
169    boolean matchCq =
170        PrivateCellUtil.matchingQualifier(cell, lastCqArray, lastCqOffset, lastCqLength);
171    if (!matchCq) {
172      // The last cell is family-level delete and this is not, or the cq is changed,
173      // we should construct delColMap as a deep copy of delFamMap.
174      delColMap.clear();
175      for (Map.Entry<Long, DeleteVersionsNode> e : delFamMap.entrySet()) {
176        delColMap.put(e.getKey(), e.getValue().getDeepCopy());
177      }
178      countCurrentCol = 0;
179    }
180    if (matchCq && !PrivateCellUtil.isDelete(lastCqType) && lastCqType == cell.getTypeByte()
181        && lastCqTs == cell.getTimestamp()) {
182      // Put with duplicate timestamp, ignore.
183      return lastCqMvcc;
184    }
185    lastCqArray = cell.getQualifierArray();
186    lastCqOffset = cell.getQualifierOffset();
187    lastCqLength = cell.getQualifierLength();
188    lastCqTs = cell.getTimestamp();
189    lastCqMvcc = cell.getSequenceId();
190    lastCqType = cell.getTypeByte();
191    return Long.MAX_VALUE;
192  }
193
194  // DeleteTracker
195  @Override
196  public void add(Cell cell) {
197    prepare(cell);
198    byte type = cell.getTypeByte();
199    switch (Type.codeToType(type)) {
200    // By the order of seen. We put null cq at first.
201    case DeleteFamily: // Delete all versions of all columns of the specified family
202      delFamMap.put(cell.getSequenceId(),
203          new DeleteVersionsNode(cell.getTimestamp(), cell.getSequenceId()));
204      break;
205    case DeleteFamilyVersion: // Delete all columns of the specified family and specified version
206      delFamMap.ceilingEntry(cell.getSequenceId()).getValue().addVersionDelete(cell);
207      break;
208
209    // These two kinds of markers are mix with Puts.
210    case DeleteColumn: // Delete all versions of the specified column
211      delColMap.put(cell.getSequenceId(),
212          new DeleteVersionsNode(cell.getTimestamp(), cell.getSequenceId()));
213      break;
214    case Delete: // Delete the specified version of the specified column.
215      delColMap.ceilingEntry(cell.getSequenceId()).getValue().addVersionDelete(cell);
216      break;
217    default:
218      throw new AssertionError("Unknown delete marker type for " + cell);
219    }
220  }
221
222  /**
223   * This method is not idempotent, we will save some info to judge VERSION_MASKED.
224   * @param cell - current cell to check if deleted by a previously seen delete
225   * @return We don't distinguish DeleteColumn and DeleteFamily. We only return code for column.
226   */
227  @Override
228  public DeleteResult isDeleted(Cell cell) {
229    long duplicateMvcc = prepare(cell);
230
231    for (Map.Entry<Long, DeleteVersionsNode> e : delColMap.tailMap(cell.getSequenceId())
232        .entrySet()) {
233      DeleteVersionsNode node = e.getValue();
234      long deleteMvcc = Long.MAX_VALUE;
235      SortedSet<Long> deleteVersionMvccs = node.deletesMap.get(cell.getTimestamp());
236      if (deleteVersionMvccs != null) {
237        SortedSet<Long> tail = deleteVersionMvccs.tailSet(cell.getSequenceId());
238        if (!tail.isEmpty()) {
239          deleteMvcc = tail.first();
240        }
241      }
242      SortedMap<Long, SortedSet<Long>> subMap =
243          node.mvccCountingMap
244              .subMap(cell.getSequenceId(), true, Math.min(duplicateMvcc, deleteMvcc), true);
245      for (Map.Entry<Long, SortedSet<Long>> seg : subMap.entrySet()) {
246        if (seg.getValue().size() >= maxVersions) {
247          return DeleteResult.VERSION_MASKED;
248        }
249        seg.getValue().add(cell.getSequenceId());
250      }
251      if (deleteMvcc < Long.MAX_VALUE) {
252        return DeleteResult.VERSION_DELETED;
253      }
254
255      if (cell.getTimestamp() <= node.ts) {
256        return DeleteResult.COLUMN_DELETED;
257      }
258    }
259    if (duplicateMvcc < Long.MAX_VALUE) {
260      return DeleteResult.VERSION_MASKED;
261    }
262    return DeleteResult.NOT_DELETED;
263  }
264
265  @Override
266  public boolean isEmpty() {
267    return delColMap.size() == 1 && delColMap.get(Long.MAX_VALUE).mvccCountingMap.size() == 1
268        && delFamMap.size() == 1 && delFamMap.get(Long.MAX_VALUE).mvccCountingMap.size() == 1;
269  }
270
271  @Override
272  public void update() {
273    // ignore
274  }
275
276  //ColumnTracker
277
278  @Override
279  public MatchCode checkColumn(Cell cell, byte type) throws IOException {
280    if (columns == null) {
281        return MatchCode.INCLUDE;
282    }
283
284    while (!done()) {
285      int c = CellUtil.compareQualifiers(cell,
286        columns[columnIndex], 0, columns[columnIndex].length);
287      if (c < 0) {
288        return MatchCode.SEEK_NEXT_COL;
289      }
290
291      if (c == 0) {
292        // We drop old version in #isDeleted, so here we must return INCLUDE.
293        return MatchCode.INCLUDE;
294      }
295
296      columnIndex++;
297    }
298    // No more columns left, we are done with this query
299    return MatchCode.SEEK_NEXT_ROW;
300  }
301
302  @Override
303  public MatchCode checkVersions(Cell cell, long timestamp, byte type,
304      boolean ignoreCount) throws IOException {
305    assert !PrivateCellUtil.isDelete(type);
306    // We drop old version in #isDeleted, so here we won't SKIP because of versioning. But we should
307    // consider TTL.
308    if (ignoreCount) {
309      return MatchCode.INCLUDE;
310    }
311    countCurrentCol++;
312    if (timestamp < this.oldestStamp) {
313      if (countCurrentCol == minVersions) {
314        return MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
315      }
316      if (countCurrentCol > minVersions) {
317        // This may not be reached, only for safety.
318        return MatchCode.SEEK_NEXT_COL;
319      }
320    }
321
322    if (countCurrentCol == resultMaxVersions) {
323      // We have enough number of versions for user's requirement.
324      return MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
325    }
326    if (countCurrentCol > resultMaxVersions) {
327      // This may not be reached, only for safety
328      return MatchCode.SEEK_NEXT_COL;
329    }
330    return MatchCode.INCLUDE;
331  }
332
333  @Override
334  public void reset() {
335    delColMap.clear();
336    delFamMap.clear();
337    lastCqArray = null;
338    lastCqLength = 0;
339    lastCqOffset = 0;
340    lastCqTs = Long.MIN_VALUE;
341    lastCqMvcc = 0;
342    lastCqType = 0;
343    columnIndex = 0;
344    countCurrentCol = 0;
345    resetInternal();
346  }
347
348  protected void resetInternal(){
349    delFamMap.put(Long.MAX_VALUE, new DeleteVersionsNode());
350  }
351
352  @Override
353  public boolean done() {
354    return columns != null && columnIndex >= columns.length;
355  }
356
357  @Override
358  public ColumnCount getColumnHint() {
359    if (columns != null) {
360      if (columnIndex < columns.length) {
361        return new ColumnCount(columns[columnIndex]);
362      }
363    }
364    return null;
365  }
366
367  @Override
368  public MatchCode getNextRowOrNextColumn(Cell cell) {
369    // TODO maybe we can optimize.
370    return MatchCode.SEEK_NEXT_COL;
371  }
372
373  @Override
374  public boolean isDone(long timestamp) {
375    // We can not skip Cells with small ts.
376    return false;
377  }
378
379  @Override
380  public CellComparator getCellComparator() {
381    return this.comparator;
382  }
383
384}