001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *    http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.hadoop.hbase.spark;
019
020import org.apache.hadoop.hbase.Cell;
021import org.apache.yetus.audience.InterfaceAudience;
022import org.apache.hadoop.hbase.exceptions.DeserializationException;
023import org.apache.hadoop.hbase.filter.FilterBase;
024import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
025import org.apache.hadoop.hbase.spark.datasources.BytesEncoder;
026import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder;
027import org.apache.hadoop.hbase.spark.protobuf.generated.SparkFilterProtos;
028import org.apache.hadoop.hbase.util.ByteStringer;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.spark.sql.datasources.hbase.Field;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033import scala.collection.mutable.MutableList;
034
035
036import java.io.IOException;
037import java.util.HashMap;
038import java.util.List;
039import java.util.Map;
040import com.google.protobuf.InvalidProtocolBufferException;
041import com.google.protobuf.ByteString;
042
043/**
044 * This filter will push down all qualifier logic given to us
045 * by SparkSQL so that we have make the filters at the region server level
046 * and avoid sending the data back to the client to be filtered.
047 */
048@InterfaceAudience.Private
049public class SparkSQLPushDownFilter extends FilterBase{
050  protected static final Logger log = LoggerFactory.getLogger(SparkSQLPushDownFilter.class);
051
052  //The following values are populated with protobuffer
053  DynamicLogicExpression dynamicLogicExpression;
054  byte[][] valueFromQueryArray;
055  HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
056          currentCellToColumnIndexMap;
057
058  //The following values are transient
059  HashMap<String, ByteArrayComparable> columnToCurrentRowValueMap = null;
060
061  static final byte[] rowKeyFamily = new byte[0];
062  static final byte[] rowKeyQualifier = Bytes.toBytes("key");
063
064  String encoderClassName;
065
066  public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression,
067                                byte[][] valueFromQueryArray,
068                                HashMap<ByteArrayComparable,
069                                        HashMap<ByteArrayComparable, String>>
070                                        currentCellToColumnIndexMap, String encoderClassName) {
071    this.dynamicLogicExpression = dynamicLogicExpression;
072    this.valueFromQueryArray = valueFromQueryArray;
073    this.currentCellToColumnIndexMap = currentCellToColumnIndexMap;
074    this.encoderClassName = encoderClassName;
075  }
076
077  public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression,
078                                byte[][] valueFromQueryArray,
079                                MutableList<Field> fields, String encoderClassName) {
080    this.dynamicLogicExpression = dynamicLogicExpression;
081    this.valueFromQueryArray = valueFromQueryArray;
082    this.encoderClassName = encoderClassName;
083
084    //generate family qualifier to index mapping
085    this.currentCellToColumnIndexMap =
086            new HashMap<>();
087
088    for (int i = 0; i < fields.size(); i++) {
089      Field field = fields.apply(i);
090
091      byte[] cfBytes = field.cfBytes();
092      ByteArrayComparable familyByteComparable =
093          new ByteArrayComparable(cfBytes, 0, cfBytes.length);
094
095      HashMap<ByteArrayComparable, String> qualifierIndexMap =
096              currentCellToColumnIndexMap.get(familyByteComparable);
097
098      if (qualifierIndexMap == null) {
099        qualifierIndexMap = new HashMap<>();
100        currentCellToColumnIndexMap.put(familyByteComparable, qualifierIndexMap);
101      }
102      byte[] qBytes = field.colBytes();
103      ByteArrayComparable qualifierByteComparable =
104          new ByteArrayComparable(qBytes, 0, qBytes.length);
105
106      qualifierIndexMap.put(qualifierByteComparable, field.colName());
107    }
108  }
109
110  @Override
111  public ReturnCode filterCell(final Cell c) throws IOException {
112
113    //If the map RowValueMap is empty then we need to populate
114    // the row key
115    if (columnToCurrentRowValueMap == null) {
116      columnToCurrentRowValueMap = new HashMap<>();
117      HashMap<ByteArrayComparable, String> qualifierColumnMap =
118              currentCellToColumnIndexMap.get(
119                      new ByteArrayComparable(rowKeyFamily, 0, rowKeyFamily.length));
120
121      if (qualifierColumnMap != null) {
122        String rowKeyColumnName =
123                qualifierColumnMap.get(
124                        new ByteArrayComparable(rowKeyQualifier, 0,
125                                rowKeyQualifier.length));
126        //Make sure that the rowKey is part of the where clause
127        if (rowKeyColumnName != null) {
128          columnToCurrentRowValueMap.put(rowKeyColumnName,
129                  new ByteArrayComparable(c.getRowArray(),
130                          c.getRowOffset(), c.getRowLength()));
131        }
132      }
133    }
134
135    //Always populate the column value into the RowValueMap
136    ByteArrayComparable currentFamilyByteComparable =
137            new ByteArrayComparable(c.getFamilyArray(),
138            c.getFamilyOffset(),
139            c.getFamilyLength());
140
141    HashMap<ByteArrayComparable, String> qualifierColumnMap =
142            currentCellToColumnIndexMap.get(
143                    currentFamilyByteComparable);
144
145    if (qualifierColumnMap != null) {
146
147      String columnName =
148              qualifierColumnMap.get(
149                      new ByteArrayComparable(c.getQualifierArray(),
150                              c.getQualifierOffset(),
151                              c.getQualifierLength()));
152
153      if (columnName != null) {
154        columnToCurrentRowValueMap.put(columnName,
155                new ByteArrayComparable(c.getValueArray(),
156                        c.getValueOffset(), c.getValueLength()));
157      }
158    }
159
160    return ReturnCode.INCLUDE;
161  }
162
163
164  @Override
165  public boolean filterRow() throws IOException {
166
167    try {
168      boolean result =
169              dynamicLogicExpression.execute(columnToCurrentRowValueMap,
170                      valueFromQueryArray);
171      columnToCurrentRowValueMap = null;
172      return !result;
173    } catch (Throwable e) {
174      log.error("Error running dynamic logic on row", e);
175    }
176    return false;
177  }
178
179
180  /**
181   * @param pbBytes A pb serialized instance
182   * @return An instance of SparkSQLPushDownFilter
183   * @throws org.apache.hadoop.hbase.exceptions.DeserializationException
184   */
185  @SuppressWarnings("unused")
186  public static SparkSQLPushDownFilter parseFrom(final byte[] pbBytes)
187          throws DeserializationException {
188
189    SparkFilterProtos.SQLPredicatePushDownFilter proto;
190    try {
191      proto = SparkFilterProtos.SQLPredicatePushDownFilter.parseFrom(pbBytes);
192    } catch (InvalidProtocolBufferException e) {
193      throw new DeserializationException(e);
194    }
195
196    String encoder = proto.getEncoderClassName();
197    BytesEncoder enc = JavaBytesEncoder.create(encoder);
198
199    //Load DynamicLogicExpression
200    DynamicLogicExpression dynamicLogicExpression =
201            DynamicLogicExpressionBuilder.build(proto.getDynamicLogicExpression(), enc);
202
203    //Load valuesFromQuery
204    final List<ByteString> valueFromQueryArrayList = proto.getValueFromQueryArrayList();
205    byte[][] valueFromQueryArray = new byte[valueFromQueryArrayList.size()][];
206    for (int i = 0; i < valueFromQueryArrayList.size(); i++) {
207      valueFromQueryArray[i] = valueFromQueryArrayList.get(i).toByteArray();
208    }
209
210    //Load mapping from HBase family/qualifier to Spark SQL columnName
211    HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
212            currentCellToColumnIndexMap = new HashMap<>();
213
214    for (SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping
215            sqlPredicatePushDownCellToColumnMapping :
216            proto.getCellToColumnMappingList()) {
217
218      byte[] familyArray =
219              sqlPredicatePushDownCellToColumnMapping.getColumnFamily().toByteArray();
220      ByteArrayComparable familyByteComparable =
221              new ByteArrayComparable(familyArray, 0, familyArray.length);
222      HashMap<ByteArrayComparable, String> qualifierMap =
223              currentCellToColumnIndexMap.get(familyByteComparable);
224
225      if (qualifierMap == null) {
226        qualifierMap = new HashMap<>();
227        currentCellToColumnIndexMap.put(familyByteComparable, qualifierMap);
228      }
229      byte[] qualifierArray =
230              sqlPredicatePushDownCellToColumnMapping.getQualifier().toByteArray();
231
232      ByteArrayComparable qualifierByteComparable =
233              new ByteArrayComparable(qualifierArray, 0 ,qualifierArray.length);
234
235      qualifierMap.put(qualifierByteComparable,
236              sqlPredicatePushDownCellToColumnMapping.getColumnName());
237    }
238
239    return new SparkSQLPushDownFilter(dynamicLogicExpression,
240            valueFromQueryArray, currentCellToColumnIndexMap, encoder);
241  }
242
243  /**
244   * @return The filter serialized using pb
245   */
246  public byte[] toByteArray() {
247
248    SparkFilterProtos.SQLPredicatePushDownFilter.Builder builder =
249            SparkFilterProtos.SQLPredicatePushDownFilter.newBuilder();
250
251    SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping.Builder columnMappingBuilder =
252            SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping.newBuilder();
253
254    builder.setDynamicLogicExpression(dynamicLogicExpression.toExpressionString());
255    for (byte[] valueFromQuery: valueFromQueryArray) {
256      builder.addValueFromQueryArray(ByteStringer.wrap(valueFromQuery));
257    }
258
259    for (Map.Entry<ByteArrayComparable, HashMap<ByteArrayComparable, String>>
260            familyEntry : currentCellToColumnIndexMap.entrySet()) {
261      for (Map.Entry<ByteArrayComparable, String> qualifierEntry :
262              familyEntry.getValue().entrySet()) {
263        columnMappingBuilder.setColumnFamily(
264                ByteStringer.wrap(familyEntry.getKey().bytes()));
265        columnMappingBuilder.setQualifier(
266                ByteStringer.wrap(qualifierEntry.getKey().bytes()));
267        columnMappingBuilder.setColumnName(qualifierEntry.getValue());
268        builder.addCellToColumnMapping(columnMappingBuilder.build());
269      }
270    }
271    builder.setEncoderClassName(encoderClassName);
272
273
274    return builder.build().toByteArray();
275  }
276}