001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.hadoop.hbase.spark; 019 020import org.apache.hadoop.hbase.Cell; 021import org.apache.yetus.audience.InterfaceAudience; 022import org.apache.hadoop.hbase.exceptions.DeserializationException; 023import org.apache.hadoop.hbase.filter.FilterBase; 024import org.apache.hadoop.hbase.filter.Filter.ReturnCode; 025import org.apache.hadoop.hbase.spark.datasources.BytesEncoder; 026import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder; 027import org.apache.hadoop.hbase.spark.protobuf.generated.SparkFilterProtos; 028import org.apache.hadoop.hbase.util.ByteStringer; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.spark.sql.datasources.hbase.Field; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033import scala.collection.mutable.MutableList; 034 035 036import java.io.IOException; 037import java.util.HashMap; 038import java.util.List; 039import java.util.Map; 040import com.google.protobuf.InvalidProtocolBufferException; 041import com.google.protobuf.ByteString; 042 043/** 044 * This filter will push down all qualifier logic given to us 045 * by SparkSQL so that we have make the filters at the region server level 046 * and avoid sending the data back to the client to be filtered. 047 */ 048@InterfaceAudience.Private 049public class SparkSQLPushDownFilter extends FilterBase{ 050 protected static final Logger log = LoggerFactory.getLogger(SparkSQLPushDownFilter.class); 051 052 //The following values are populated with protobuffer 053 DynamicLogicExpression dynamicLogicExpression; 054 byte[][] valueFromQueryArray; 055 HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>> 056 currentCellToColumnIndexMap; 057 058 //The following values are transient 059 HashMap<String, ByteArrayComparable> columnToCurrentRowValueMap = null; 060 061 static final byte[] rowKeyFamily = new byte[0]; 062 static final byte[] rowKeyQualifier = Bytes.toBytes("key"); 063 064 String encoderClassName; 065 066 public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression, 067 byte[][] valueFromQueryArray, 068 HashMap<ByteArrayComparable, 069 HashMap<ByteArrayComparable, String>> 070 currentCellToColumnIndexMap, String encoderClassName) { 071 this.dynamicLogicExpression = dynamicLogicExpression; 072 this.valueFromQueryArray = valueFromQueryArray; 073 this.currentCellToColumnIndexMap = currentCellToColumnIndexMap; 074 this.encoderClassName = encoderClassName; 075 } 076 077 public SparkSQLPushDownFilter(DynamicLogicExpression dynamicLogicExpression, 078 byte[][] valueFromQueryArray, 079 MutableList<Field> fields, String encoderClassName) { 080 this.dynamicLogicExpression = dynamicLogicExpression; 081 this.valueFromQueryArray = valueFromQueryArray; 082 this.encoderClassName = encoderClassName; 083 084 //generate family qualifier to index mapping 085 this.currentCellToColumnIndexMap = 086 new HashMap<>(); 087 088 for (int i = 0; i < fields.size(); i++) { 089 Field field = fields.apply(i); 090 091 byte[] cfBytes = field.cfBytes(); 092 ByteArrayComparable familyByteComparable = 093 new ByteArrayComparable(cfBytes, 0, cfBytes.length); 094 095 HashMap<ByteArrayComparable, String> qualifierIndexMap = 096 currentCellToColumnIndexMap.get(familyByteComparable); 097 098 if (qualifierIndexMap == null) { 099 qualifierIndexMap = new HashMap<>(); 100 currentCellToColumnIndexMap.put(familyByteComparable, qualifierIndexMap); 101 } 102 byte[] qBytes = field.colBytes(); 103 ByteArrayComparable qualifierByteComparable = 104 new ByteArrayComparable(qBytes, 0, qBytes.length); 105 106 qualifierIndexMap.put(qualifierByteComparable, field.colName()); 107 } 108 } 109 110 @Override 111 public ReturnCode filterCell(final Cell c) throws IOException { 112 113 //If the map RowValueMap is empty then we need to populate 114 // the row key 115 if (columnToCurrentRowValueMap == null) { 116 columnToCurrentRowValueMap = new HashMap<>(); 117 HashMap<ByteArrayComparable, String> qualifierColumnMap = 118 currentCellToColumnIndexMap.get( 119 new ByteArrayComparable(rowKeyFamily, 0, rowKeyFamily.length)); 120 121 if (qualifierColumnMap != null) { 122 String rowKeyColumnName = 123 qualifierColumnMap.get( 124 new ByteArrayComparable(rowKeyQualifier, 0, 125 rowKeyQualifier.length)); 126 //Make sure that the rowKey is part of the where clause 127 if (rowKeyColumnName != null) { 128 columnToCurrentRowValueMap.put(rowKeyColumnName, 129 new ByteArrayComparable(c.getRowArray(), 130 c.getRowOffset(), c.getRowLength())); 131 } 132 } 133 } 134 135 //Always populate the column value into the RowValueMap 136 ByteArrayComparable currentFamilyByteComparable = 137 new ByteArrayComparable(c.getFamilyArray(), 138 c.getFamilyOffset(), 139 c.getFamilyLength()); 140 141 HashMap<ByteArrayComparable, String> qualifierColumnMap = 142 currentCellToColumnIndexMap.get( 143 currentFamilyByteComparable); 144 145 if (qualifierColumnMap != null) { 146 147 String columnName = 148 qualifierColumnMap.get( 149 new ByteArrayComparable(c.getQualifierArray(), 150 c.getQualifierOffset(), 151 c.getQualifierLength())); 152 153 if (columnName != null) { 154 columnToCurrentRowValueMap.put(columnName, 155 new ByteArrayComparable(c.getValueArray(), 156 c.getValueOffset(), c.getValueLength())); 157 } 158 } 159 160 return ReturnCode.INCLUDE; 161 } 162 163 164 @Override 165 public boolean filterRow() throws IOException { 166 167 try { 168 boolean result = 169 dynamicLogicExpression.execute(columnToCurrentRowValueMap, 170 valueFromQueryArray); 171 columnToCurrentRowValueMap = null; 172 return !result; 173 } catch (Throwable e) { 174 log.error("Error running dynamic logic on row", e); 175 } 176 return false; 177 } 178 179 180 /** 181 * @param pbBytes A pb serialized instance 182 * @return An instance of SparkSQLPushDownFilter 183 * @throws org.apache.hadoop.hbase.exceptions.DeserializationException 184 */ 185 @SuppressWarnings("unused") 186 public static SparkSQLPushDownFilter parseFrom(final byte[] pbBytes) 187 throws DeserializationException { 188 189 SparkFilterProtos.SQLPredicatePushDownFilter proto; 190 try { 191 proto = SparkFilterProtos.SQLPredicatePushDownFilter.parseFrom(pbBytes); 192 } catch (InvalidProtocolBufferException e) { 193 throw new DeserializationException(e); 194 } 195 196 String encoder = proto.getEncoderClassName(); 197 BytesEncoder enc = JavaBytesEncoder.create(encoder); 198 199 //Load DynamicLogicExpression 200 DynamicLogicExpression dynamicLogicExpression = 201 DynamicLogicExpressionBuilder.build(proto.getDynamicLogicExpression(), enc); 202 203 //Load valuesFromQuery 204 final List<ByteString> valueFromQueryArrayList = proto.getValueFromQueryArrayList(); 205 byte[][] valueFromQueryArray = new byte[valueFromQueryArrayList.size()][]; 206 for (int i = 0; i < valueFromQueryArrayList.size(); i++) { 207 valueFromQueryArray[i] = valueFromQueryArrayList.get(i).toByteArray(); 208 } 209 210 //Load mapping from HBase family/qualifier to Spark SQL columnName 211 HashMap<ByteArrayComparable, HashMap<ByteArrayComparable, String>> 212 currentCellToColumnIndexMap = new HashMap<>(); 213 214 for (SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping 215 sqlPredicatePushDownCellToColumnMapping : 216 proto.getCellToColumnMappingList()) { 217 218 byte[] familyArray = 219 sqlPredicatePushDownCellToColumnMapping.getColumnFamily().toByteArray(); 220 ByteArrayComparable familyByteComparable = 221 new ByteArrayComparable(familyArray, 0, familyArray.length); 222 HashMap<ByteArrayComparable, String> qualifierMap = 223 currentCellToColumnIndexMap.get(familyByteComparable); 224 225 if (qualifierMap == null) { 226 qualifierMap = new HashMap<>(); 227 currentCellToColumnIndexMap.put(familyByteComparable, qualifierMap); 228 } 229 byte[] qualifierArray = 230 sqlPredicatePushDownCellToColumnMapping.getQualifier().toByteArray(); 231 232 ByteArrayComparable qualifierByteComparable = 233 new ByteArrayComparable(qualifierArray, 0 ,qualifierArray.length); 234 235 qualifierMap.put(qualifierByteComparable, 236 sqlPredicatePushDownCellToColumnMapping.getColumnName()); 237 } 238 239 return new SparkSQLPushDownFilter(dynamicLogicExpression, 240 valueFromQueryArray, currentCellToColumnIndexMap, encoder); 241 } 242 243 /** 244 * @return The filter serialized using pb 245 */ 246 public byte[] toByteArray() { 247 248 SparkFilterProtos.SQLPredicatePushDownFilter.Builder builder = 249 SparkFilterProtos.SQLPredicatePushDownFilter.newBuilder(); 250 251 SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping.Builder columnMappingBuilder = 252 SparkFilterProtos.SQLPredicatePushDownCellToColumnMapping.newBuilder(); 253 254 builder.setDynamicLogicExpression(dynamicLogicExpression.toExpressionString()); 255 for (byte[] valueFromQuery: valueFromQueryArray) { 256 builder.addValueFromQueryArray(ByteStringer.wrap(valueFromQuery)); 257 } 258 259 for (Map.Entry<ByteArrayComparable, HashMap<ByteArrayComparable, String>> 260 familyEntry : currentCellToColumnIndexMap.entrySet()) { 261 for (Map.Entry<ByteArrayComparable, String> qualifierEntry : 262 familyEntry.getValue().entrySet()) { 263 columnMappingBuilder.setColumnFamily( 264 ByteStringer.wrap(familyEntry.getKey().bytes())); 265 columnMappingBuilder.setQualifier( 266 ByteStringer.wrap(qualifierEntry.getKey().bytes())); 267 columnMappingBuilder.setColumnName(qualifierEntry.getValue()); 268 builder.addCellToColumnMapping(columnMappingBuilder.build()); 269 } 270 } 271 builder.setEncoderClassName(encoderClassName); 272 273 274 return builder.build().toByteArray(); 275 } 276}