private void addORColsToFinalList(FilterList filterList) { for (Entry<Column, List<Value>> entry : colWithOperatorsOfOR.entrySet()) { List<Value> valueList = entry.getValue(); for (Value value : valueList) { if (value.getOperator() == CompareOp.EQUAL) { filterList.addFilter(value.getFilter()); } else { SingleColumnRangeFilter rangeFltr = new SingleColumnRangeFilter(entry.getKey().getFamily(), entry.getKey().getQualifier(), entry.getKey().getValuePartition(), value.getValue(), value.getOperator(), null, null); filterList.addFilter(rangeFltr); } } } }
private List<List<Column>> createCombination(LinkedList<Column> copyOfColumns, int r, ArrayList<List<Column>> possibleCols2) { int j; int k = 0; List<List<Column>> possibleCols = new ArrayList<List<Column>>(); ArrayList<Column> colList = new ArrayList<Column>(); int size = copyOfColumns.size(); for (int i = 0; i < (1 << size); i++) { if (r != findNoOfBitsSet(i)) { continue; } j = i; k = 0; while (j != 0) { if (j % 2 != 0) { colList.add(copyOfColumns.get(k)); } j /= 2; k++; } possibleCols.add((List<Column>) colList.clone()); colList.clear(); } return possibleCols; }
@Test public void testConvertingSimpleIndexExpressionToByteArray() throws Exception { SingleIndexExpression singleIndexExpression = new SingleIndexExpression("idx1"); Column column = new Column(FAMILY1, QUALIFIER1); byte[] value = "1".getBytes(); EqualsExpression equalsExpression = new EqualsExpression(column, value); singleIndexExpression.addEqualsExpression(equalsExpression); byte[] bytes = IndexUtils.toBytes(singleIndexExpression); ByteArrayInputStream bis = new ByteArrayInputStream(bytes); ObjectInputStream ois = new ObjectInputStream(bis); SingleIndexExpression readExp = (SingleIndexExpression) ois.readObject(); assertEquals("idx1", readExp.getIndexName()); assertEquals(1, readExp.getEqualsExpressions().size()); assertTrue(Bytes.equals(value, readExp.getEqualsExpressions().get(0).getValue())); assertEquals(column, readExp.getEqualsExpressions().get(0).getColumn()); }
@Test public void testSingleIndexExpressionWithOneEqualsExpression() throws Exception { String indexName = "idx1"; SingleIndexExpression singleIndexExpression = new SingleIndexExpression(indexName); byte[] value = "1".getBytes(); Column column = new Column(FAMILY1, QUALIFIER1); EqualsExpression equalsExpression = new EqualsExpression(column, value); singleIndexExpression.addEqualsExpression(equalsExpression); Scan scan = new Scan(); scan.setAttribute(Constants.INDEX_EXPRESSION, IndexUtils.toBytes(singleIndexExpression)); Filter filter = new SingleColumnValueFilter(FAMILY1, QUALIFIER1, CompareOp.EQUAL, value); scan.setFilter(filter); ScanFilterEvaluator evaluator = new ScanFilterEvaluator(); List<IndexSpecification> indices = new ArrayList<IndexSpecification>(); IndexSpecification index = new IndexSpecification(indexName); HColumnDescriptor colDesc = new HColumnDescriptor(FAMILY1); index.addIndexColumn(colDesc, COL1, ValueType.String, 10); indices.add(index); HRegion region = initHRegion(tableName.getBytes(), null, null, "testSingleIndexExpressionWithOneEqualsExpression", TEST_UTIL.getConfiguration(), FAMILY1); IndexRegionScanner scanner = evaluator.evaluate(scan, indices, new byte[0], region, tableName); // TODO add assertions }
private List<List<Column>> createCombination(LinkedList<Column> copyOfColumns, int r, ArrayList<List<Column>> possibleCols2) { int j; int k = 0; List<List<Column>> possibleCols = new ArrayList<List<Column>>(); ArrayList<Column> colList = new ArrayList<Column>(); int size = copyOfColumns.size(); for (int i = 0; i < (1 << size); i++) { if (r != findNoOfBitsSet(i)) { continue; } j = i; k = 0; while (j != 0) { if (j % 2 == 1) { colList.add(copyOfColumns.get(k)); } j /= 2; k++; } possibleCols.add((List<Column>) colList.clone()); colList.clear(); } return possibleCols; }
/** * When the range is non closed at one end (to specific upper bound but only lower bound) pass the * corresponding bound value as null. * @param column * @param lowerBoundValue * @param upperBoundValue * @param lowerBoundInclusive * @param upperBoundInclusive */ public RangeExpression(Column column, byte[] lowerBoundValue, byte[] upperBoundValue, boolean lowerBoundInclusive, boolean upperBoundInclusive) { if (column == null || (lowerBoundValue == null && upperBoundValue == null)) { throw new IllegalArgumentException(); } this.column = column; this.lowerBoundValue = lowerBoundValue; this.upperBoundValue = upperBoundValue; this.lowerBoundInclusive = lowerBoundInclusive; this.upperBoundInclusive = upperBoundInclusive; }
public FilterColumnValueDetail(byte[] cf, byte[] qualifier, byte[] value, CompareOp compareOp) { this.cf = cf; this.qualifier = qualifier; this.value = value; this.compareOp = compareOp; this.column = new Column(this.cf, this.qualifier); }
public FilterColumnValueDetail(byte[] cf, byte[] qualifier, byte[] value, ValuePartition valuePartition, CompareOp compareOp) { this.cf = cf; this.qualifier = qualifier; this.value = value; this.compareOp = compareOp; this.column = new Column(this.cf, this.qualifier, valuePartition); }
public FilterColumnValueDetail(Column column, byte[] value, CompareOp compareOp) { this.cf = column.getFamily(); this.qualifier = column.getQualifier(); this.value = value; this.compareOp = compareOp; this.column = column; }
private FilterNode handleANDCondition(NonLeafFilterNode nonLeafFilterNode) { Map<Column, LeafFilterNode> leafNodes = new HashMap<Column, LeafFilterNode>(); Iterator<FilterNode> filterNodesIterator = nonLeafFilterNode.getFilterNodes().iterator(); while (filterNodesIterator.hasNext()) { FilterNode filterNode = filterNodesIterator.next(); if (filterNode instanceof LeafFilterNode) { LeafFilterNode lfNode = (LeafFilterNode) filterNode; leafNodes.put(lfNode.getFilterColumnValueDetail().column, lfNode); filterNodesIterator.remove(); } else if (filterNode instanceof NoIndexFilterNode) { filterNodesIterator.remove(); } // Any NonLeafFilterNode under this NonLeafFilterNode will be kept as it is. // This will be a OR condition corresponding node. } // This below method will consider all the leafNodes just under that and will try to // finalize one or more index to be used for those col. It will try to get the best // combination minimizing the number of indices to be used. If I have say 5 leaf cols // under this AND node and there is one index on these 5 cols, well I can use that one // index. If not will try to find indices which can be applied on the subsets of these // 5 cols, say one on 3 cols and other on 2 cols if (!leafNodes.isEmpty()) { Map<List<Column>, IndexSpecification> colVsIndex = finalizeIndexForLeafNodes(leafNodes); if (LOG.isDebugEnabled()) { LOG.debug("Index(s) which will be used for columns " + leafNodes.keySet() + " : " + colVsIndex); } if (colVsIndex != null) { addIndicesToNonLeafAndNode(colVsIndex, nonLeafFilterNode, leafNodes); } } return nonLeafFilterNode; }
@SuppressWarnings("unchecked") private Map<List<Column>, IndexSpecification> finalizeIndexForLeafNodes( Map<Column, LeafFilterNode> leafNodes) { // go with the breakups and check // suppose there are 5 cols under the AND condition and are c1,c2,c3,c4,c5 // There can be different break ups for the cols possible. // [5],[4,1],[3,2],[3,1,1],[2,2,1],[2,1,1,1],[1,1,1,1,1] // In each of these breakup also we can get many columns combinations. // Except in first and last where either all cols in one group or 1 column only. // For [4,1] there can be 5c1 combinations possible. Set<List<List<List<Column>>>> colBreakUps = getColsBreakUps(leafNodes.keySet()); ColBreakUpIndexDetails bestColBreakUpIndexDetails = null; for (List<List<List<Column>>> colBreakUp : colBreakUps) { ColBreakUpIndexDetails colBreakUpIndexDetails = findBestIndicesForColSplitsInBreakUp(colBreakUp, leafNodes); if (colBreakUpIndexDetails == null) { continue; } if (colBreakUpIndexDetails.isBestIndex) { // This means this is THE best index. It solves all the columns and exactly those cols only // there as part of the indices too.. What else we need... bestColBreakUpIndexDetails = colBreakUpIndexDetails; break; } else { if (bestColBreakUpIndexDetails == null || isIndicesGroupBetterThanCurBest(colBreakUpIndexDetails, bestColBreakUpIndexDetails)) { bestColBreakUpIndexDetails = colBreakUpIndexDetails; } } } // TODO some more logging of the output.. return bestColBreakUpIndexDetails == null ? null : bestColBreakUpIndexDetails.bestIndicesForBreakUp; }
private void addIndicesToNonLeafAndNode(Map<List<Column>, IndexSpecification> colsVsIndex, NonLeafFilterNode nonLeafFilterNode, Map<Column, LeafFilterNode> leafNodes) { for (Entry<List<Column>, IndexSpecification> entry : colsVsIndex.entrySet()) { List<Column> cols = entry.getKey(); int colsSize = cols.size(); IndexSpecification index = entry.getValue(); // The FilterColumnValueDetail for cols need to be in the same order as that of cols // in the index. This order will be important for creating the start/stop keys for // index scan. List<FilterColumnValueDetail> fcvds = new ArrayList<FilterColumnValueDetail>(colsSize); int i = 0; for (ColumnQualifier cq : index.getIndexColumns()) { FilterColumnValueDetail fcvd = leafNodes.get( new Column(cq.getColumnFamily(), cq.getQualifier(), cq.getValuePartition())) .getFilterColumnValueDetail(); assert fcvd != null; fcvds.add(fcvd); i++; if (i == colsSize) { // The selected index might be on more cols than those we are interested in now. // All those will be towards the end. break; } } LOG.info("Index using for the columns " + cols + " : " + index); nonLeafFilterNode.addIndicesToUse(fcvds, index); } }
private IndexSpecification findBestIndex(List<Column> cols, Map<Column, LeafFilterNode> leafNodes) { if (LOG.isDebugEnabled()) { LOG.debug("Trying to find a best index for the cols : " + cols); } Set<IndexSpecification> indicesToUse = getPossibleIndicesForCols(cols, leafNodes); // indicesToUse will never come as null.... if (LOG.isDebugEnabled()) { LOG.debug("Possible indices for cols " + cols + " : " + indicesToUse); } IndexSpecification bestIndex = null; int bestIndexCardinality = -1; for (IndexSpecification index : indicesToUse) { if (isIndexSuitable(index, cols, leafNodes)) { if (LOG.isDebugEnabled()) { LOG.debug("Index " + index + " seems to be suitable for the columns " + cols); } if (index.getIndexColumns().size() == cols.size()) { // Yea we got the best index. Juts return this. No need to loop through and check // with other indices return index; } // Compare this index with the current best. This will be better if its cardinality // is better(lesser) than the current best's // TODO pluggable interface to decide which index to be used when both this and current // best index having same cardinality. if (bestIndex == null || index.getIndexColumns().size() < bestIndexCardinality) { bestIndex = index; bestIndexCardinality = index.getIndexColumns().size(); } } } return bestIndex; }
private boolean isIndexSuitable(IndexSpecification index, List<Column> cols, Map<Column, LeafFilterNode> leafNodes) { int matchedCols = 0; for (ColumnQualifier cq : index.getIndexColumns()) { Column column = new Column(cq.getColumnFamily(), cq.getQualifier(), cq.getValuePartition()); if (cols.contains(column)) { matchedCols++; // leafNodes.get(column) will never be null.. Don't worry if (leafNodes.get(column).getFilterColumnValueDetail() instanceof FilterColumnValueRange) { // When the condition on the column is a range condition, we need to ensure in this index // 1. The column is the last column // or // 2. There are no columns in this index which is part of the cols list if (matchedCols != cols.size()) { return false; } } } else { if (matchedCols != cols.size()) { return false; } } if (matchedCols == cols.size()) { return true; } } return false; }
@Override public Map<Column, List<Pair<IndexSpecification, Integer>>> getPossibleFutureUseIndices() { // TODO avoid create of Map instance all the time... Map<Column, List<Pair<IndexSpecification, Integer>>> reply = new HashMap<Column, List<Pair<IndexSpecification, Integer>>>(); reply.put(filterColumnValueDetail.getColumn(), possibleFutureUseIndices); return reply; }
/** * all possible indices which can be used. This includes the selected indexToUse also. This * contains the an integer as the second item in the Pair. This is the relative overhead in * scanning the index region. The lesser the value the lesser the overhead in scanning the index * region. This will be set with the number of columns in the index specification. * @return all possible indices can be used. */ @Override public Map<Column, List<Pair<IndexSpecification, Integer>>> getPossibleUseIndices() { // TODO avoid create of Map instance all the time... Map<Column, List<Pair<IndexSpecification, Integer>>> reply = new HashMap<Column, List<Pair<IndexSpecification, Integer>>>(); reply.put(filterColumnValueDetail.getColumn(), possibleUseIndices); return reply; }
@Test public void testConvertingBytesIntoIndexExpression() throws Exception { SingleIndexExpression singleIndexExpression = new SingleIndexExpression("idx1"); Column column = new Column(FAMILY1, QUALIFIER1); byte[] value = "1".getBytes(); EqualsExpression equalsExpression = new EqualsExpression(column, value); singleIndexExpression.addEqualsExpression(equalsExpression); }
@Override public void preCreateTable(ObserverContext<MasterCoprocessorEnvironment> ctx, HTableDescriptor desc, HRegionInfo[] regions) throws IOException { LOG.info("Entered into preCreateTable."); MasterServices master = ctx.getEnvironment().getMasterServices(); if (desc instanceof IndexedHTableDescriptor) { Map<Column, Pair<ValueType, Integer>> indexColDetails = new HashMap<Column, Pair<ValueType, Integer>>(); String tableName = desc.getNameAsString(); checkEndsWithIndexSuffix(tableName); String indexTableName = IndexUtils.getIndexTableName(tableName); List<IndexSpecification> indices = ((IndexedHTableDescriptor) desc).getIndices(); // Even if indices list is empty,it will create index table also. if (indices.isEmpty()) { if (LOG.isDebugEnabled()) { LOG.debug("Empty indices. Index table may not created" + " if master goes down in between user table creation"); } } LOG.trace("Checking whether column families in " + "index specification are in actual table column familes."); for (IndexSpecification iSpec : indices) { checkColumnsForValidityAndConsistency(desc, iSpec, indexColDetails); } LOG.trace("Column families in index specifications " + "are in actual table column familes."); boolean isTableExists = MetaReader.tableExists(master.getCatalogTracker(), tableName); boolean isIndexTableExists = MetaReader.tableExists(master.getCatalogTracker(), indexTableName); if (isTableExists && isIndexTableExists) { throw new TableExistsException("Table " + tableName + " already exist."); } else if (isIndexTableExists) { disableAndDeleteTable(master, indexTableName); } } LOG.info("Exiting from preCreateTable."); }
private void checkColumnsForValidityAndConsistency(HTableDescriptor desc, IndexSpecification iSpec, Map<Column, Pair<ValueType, Integer>> indexColDetails) throws IOException { for (ColumnQualifier cq : iSpec.getIndexColumns()) { if (null == desc.getFamily(cq.getColumnFamily())) { String message = "Column family " + cq.getColumnFamilyString() + " in index specification " + iSpec.getName() + " not in Column families of table " + desc.getNameAsString() + '.'; LOG.error(message); IllegalArgumentException ie = new IllegalArgumentException(message); throw new IOException(ie); } Column column = new Column(cq.getColumnFamily(), cq.getQualifier(), cq.getValuePartition()); ValueType type = cq.getType(); int maxlength = cq.getMaxValueLength(); Pair<ValueType, Integer> colDetail = indexColDetails.get(column); if (null != colDetail) { if (!colDetail.getFirst().equals(type) || colDetail.getSecond() != maxlength) { throw new IOException("ValueType/max value length of column " + column + " not consistent across the indices"); } } else { indexColDetails.put(column, new Pair<ValueType, Integer>(type, maxlength)); } } }
/** * all possible indices which can be used. This includes the selected indexToUse also. This * contains the an integer as the second item in the Pair. This is the relative overhead in * scanning the index region. The lesser the value the lesser the overhead in scanning the index * region. This will be set with the number of columns in the index specification. * @return */ @Override public Map<Column, List<Pair<IndexSpecification, Integer>>> getPossibleUseIndices() { // TODO avoid create of Map instance all the time... Map<Column, List<Pair<IndexSpecification, Integer>>> reply = new HashMap<Column, List<Pair<IndexSpecification, Integer>>>(); reply.put(filterColumnValueDetail.getColumn(), possibleUseIndices); return reply; }
public Column getColumn() { return column; }
public EqualsExpression(Column column, byte[] value) { this.column = column; this.value = value; }
public Column getColumn() { return this.column; }
@Override public void preCreateTable(ObserverContext<MasterCoprocessorEnvironment> ctx, HTableDescriptor desc, HRegionInfo[] regions) throws IOException { LOG.info("Entered into preCreateTable."); MasterServices master = ctx.getEnvironment().getMasterServices(); byte[] indexBytes = desc.getValue(Constants.INDEX_SPEC_KEY); if (indexBytes != null) { Map<Column, Pair<ValueType, Integer>> indexColDetails = new HashMap<Column, Pair<ValueType, Integer>>(); TableName tableName = desc.getTableName(); checkEndsWithIndexSuffix(tableName); TableName indexTableName = TableName.valueOf(IndexUtils.getIndexTableName(tableName)); TableIndices tableIndices = new TableIndices(); tableIndices.readFields(indexBytes); List<IndexSpecification> indices = tableIndices.getIndices(); // Even if indices list is empty,it will create index table also. if (indices.isEmpty()) { if (LOG.isDebugEnabled()) { LOG.debug("Empty indices. Index table may not created" + " if master goes down in between user table creation"); } } LOG.trace("Checking whether column families in " + "index specification are in actual table column familes."); for (IndexSpecification iSpec : indices) { checkColumnsForValidityAndConsistency(desc, iSpec, indexColDetails); } LOG.trace("Column families in index specifications " + "are in actual table column familes."); boolean isTableExists = MetaReader.tableExists(master.getCatalogTracker(), desc.getTableName()); boolean isIndexTableExists = MetaReader.tableExists(master.getCatalogTracker(), indexTableName); if (isTableExists && isIndexTableExists) { throw new TableExistsException(desc.getTableName()); } else if (isIndexTableExists) { disableAndDeleteTable(master, indexTableName); } idxManager.addIndexForTable(desc.getNameAsString(), indices); } LOG.info("Exiting from preCreateTable."); }