private void bfs(Graph graph, int s) { IntArrayFIFOQueue queue = new IntArrayFIFOQueue(); queue.enqueue(s); visited[s] = true; while (!queue.isEmpty()) { int v = queue.dequeueInt(); IntArrayList adj = graph.adj(v); for (int i = 0; i < adj.size(); i++) { int w = adj.getInt(i); if (!visited[w]) { visited[w] = true; queue.enqueue(w); edgeTo[w] = v; } } } }
public static void triangulateSingleNum(IntList data, List<int[]> idsVert){ IntList dataNew=new IntArrayList(idsVert.size()); int count=0; for(int[] face:idsVert){ dataNew.add(data.getInt(count)); dataNew.add(data.getInt(count+1)); dataNew.add(data.getInt(count+2)); if(face.length>3){ dataNew.add(data.getInt(count)); dataNew.add(data.getInt(count+2)); dataNew.add(data.getInt(count+3)); } count+=face.length; } data.clear(); data.addAll(dataNew); }
public void contaminationDownsampling(final Map<String, Double> perSampleDownsamplingFraction) { final int sampleCount = samples.sampleCount(); final IntArrayList readsToRemove = new IntArrayList(10); // blind estimate, can be improved? final int alleleCount = alleles.alleleCount(); for (int s = 0; s < sampleCount; s++) { final String sample = samples.sampleAt(s); final Double fractionDouble = perSampleDownsamplingFraction.get(sample); if (fractionDouble == null) continue; final double fraction = fractionDouble; if (Double.isNaN(fraction) || fraction <= 0.0) continue; if (fraction >= 1.0) { final int sampleReadCount = readsBySampleIndex[s].length; readsToRemove.ensureCapacity(sampleReadCount); for (int r = 0; r < sampleReadCount; r++) readsToRemove.add(r); removeSampleReads(s, readsToRemove, alleleCount); readsToRemove.clear(); } else { final Map<A, List<GATKSAMRecord>> readsByBestAllelesMap = readsByBestAlleleMap(s); removeSampleReads(s, AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(readsByBestAllelesMap, fraction), alleleCount); } } }
public static String concat(IntArrayList integers, String separator) { if (integers == null) return ""; StringBuilder buffer = new StringBuilder(); for (int integer : integers) { buffer.append(integer); buffer.append(separator); } if (buffer.length() > separator.length()) buffer.delete(buffer.length() - separator.length(), buffer.length()); return buffer.toString(); }
/** * Given a list of lists, return all the combinations between the lists (i.e. their indices). For example, suppose we * have the list of lists: [[1, 2, 3], [4, 5], [6, 7, 8]]. Then, this function will return: * [[0, 1], [1, 0], [0, 2], [2, 0], [1, 2], [2, 1], * [0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 1, 0], [2, 0, 1]] * @param lists: list of lists * @return */ public static <T> ObjectArrayList<IntArrayList> getListsCombinationIndices(ObjectArrayList<ObjectArrayList<T>> lists){ ObjectArrayList<IntArrayList> combinationsInd = new ObjectArrayList<>(); ObjectArrayList<IntArrayList> result = new ObjectArrayList<>(); int[][] combinations; for (int k = 2; k <= lists.size(); k++){ result.clear(); combinations = null; combinations = getCombinations(k, lists.size()); for (int i = 0; i < combinations.length; i++) { IntArrayList indices = new IntArrayList(); for (int j = 0; j < combinations[i].length; j++) { indices.add(combinations[i][j]); } permute(indices, 0, result); } combinationsInd.addAll(result); } return combinationsInd; }
private void output() throws CouldNotReceiveResultException, ColumnNameMismatchException { // Read the discovered INDs from the attributes Int2ObjectOpenHashMap<IntList> dep2ref = new Int2ObjectOpenHashMap<IntList>(this.numColumns); for (Attribute spiderAttribute : this.attributeId2attributeObject.values()) if (!spiderAttribute.getReferenced().isEmpty()) dep2ref.put(spiderAttribute.getAttributeId(), new IntArrayList(spiderAttribute.getReferenced())); // Write the result to the resultReceiver for (int dep : dep2ref.keySet()) { String depTableName = this.getTableNameFor(dep, this.tableColumnStartIndexes); String depColumnName = this.columnNames.get(dep); for (int ref : dep2ref.get(dep)) { String refTableName = this.getTableNameFor(ref, this.tableColumnStartIndexes); String refColumnName = this.columnNames.get(ref); this.resultReceiver.receiveResult(new InclusionDependency(new ColumnPermutation(new ColumnIdentifier(depTableName, depColumnName)), new ColumnPermutation(new ColumnIdentifier(refTableName, refColumnName)))); this.numUnaryINDs++; } } }
private int[][] overlappingReadIndicesBySampleIndex(final GenomeLoc overlap) { if (overlap == null) return null; final int sampleCount = samples.sampleCount(); final int[][] result = new int[sampleCount][]; final IntArrayList buffer = new IntArrayList(200); final int referenceIndex = overlap.getContigIndex(); final int overlapStart = overlap.getStart(); final int overlapEnd = overlap.getStop(); for (int s = 0; s < sampleCount; s++) { buffer.clear(); final GATKSAMRecord[] sampleReads = readsBySampleIndex[s]; final int sampleReadCount = sampleReads.length; buffer.ensureCapacity(sampleReadCount); for (int r = 0; r < sampleReadCount; r++) if (unclippedReadOverlapsRegion(sampleReads[r], referenceIndex, overlapStart, overlapEnd)) buffer.add(r); result[s] = buffer.toIntArray(); } return result; }
protected List<PositionListIndex> fetchPositionListIndexes(List<HashMap<String, IntArrayList>> clusterMaps, boolean isNullEqualNull) { List<PositionListIndex> clustersPerAttribute = new ArrayList<>(); for (int columnId = 0; columnId < clusterMaps.size(); columnId++) { List<IntArrayList> clusters = new ArrayList<>(); HashMap<String, IntArrayList> clusterMap = clusterMaps.get(columnId); if (!isNullEqualNull) clusterMap.remove(null); for (IntArrayList cluster : clusterMap.values()) if (cluster.size() > 1) clusters.add(cluster); clustersPerAttribute.add(new PositionListIndex(columnId, clusters)); } return clustersPerAttribute; }
protected static List<PositionListIndex> fetchPositionListIndexesStatic(List<HashMap<String, IntArrayList>> clusterMaps, boolean isNullEqualNull) { List<PositionListIndex> clustersPerAttribute = new ArrayList<>(); for (int columnId = 0; columnId < clusterMaps.size(); columnId++) { List<IntArrayList> clusters = new ArrayList<>(); HashMap<String, IntArrayList> clusterMap = clusterMaps.get(columnId); if (!isNullEqualNull) clusterMap.remove(null); for (IntArrayList cluster : clusterMap.values()) if (cluster.size() > 1) clusters.add(cluster); clustersPerAttribute.add(new PositionListIndex(columnId, clusters)); } return clustersPerAttribute; }
public boolean isUniqueWith(int[][] compressedRecords, OpenBitSet otherAttrs, List<IntegerPair> comparisonSuggestions) { int attrsSize = (int) otherAttrs.cardinality(); for (IntArrayList cluster : this.clusters) { Object2IntOpenHashMap<ClusterIdentifier> value2record = new Object2IntOpenHashMap<>(cluster.size()); for (int recordId : cluster) { ClusterIdentifier value = this.buildClusterIdentifier(otherAttrs, attrsSize, compressedRecords[recordId]); if (value == null) continue; if (value2record.containsKey(value)) { comparisonSuggestions.add(new IntegerPair(recordId, value2record.getInt(value))); return false; } value2record.put(value, recordId); } } return true; }
Clause() { this.constituents = new ObjectArrayList<Constituent>(); this.type = Type.UNKNOWN; this.subject = -1; this.verb = -1; this.dobjects = new IntArrayList(); this.iobjects = new IntArrayList(); this.complement = -1; this.xcomps = new IntArrayList(); this.ccomps = new IntArrayList(); this.acomps = new IntArrayList(); this.adverbials = new IntArrayList(); this.relativeAdverbial = false; this.parentClause = null; this.include = new BooleanArrayList(); this.propositions = new ObjectArrayList<Proposition>(); }
private void fetchNonFdsWindowingOverClusters(Set<OpenBitSet> negCover, int[][] compressedRecords, List<PositionListIndex> plis) { System.out.println("\tMoving window over small clusters ..."); for (PositionListIndex pli : plis) { boolean selectSmallClustersOnly = pli.getClusters().size() < this.attributeThreshold; // If there are too few clusters, then the clusters are large and we have already executed sufficient comparisons between the records of these clusters for (IntArrayList cluster : pli.getClusters()) { if (selectSmallClustersOnly && (cluster.size() > this.windowSize)) // But if the current cluster is very small, we should still use it for comparisons (the other cluster(s) must be very large) continue; for (int recordIndex = 0; recordIndex < cluster.size(); recordIndex++) { int recordId = cluster.getInt(recordIndex); for (int partnerRecordIndex = recordIndex + 1; partnerRecordIndex < Math.min(recordIndex + this.windowSize, cluster.size()); partnerRecordIndex++) { int partnerRecordId = cluster.getInt(partnerRecordIndex); negCover.add(this.getViolatedFds(compressedRecords[recordId], compressedRecords[partnerRecordId])); } } } } }
private void fetchNonFdsFromClustersTopsAndBottoms(Set<OpenBitSet> negCover, int[][] compressedRecords, List<PositionListIndex> plis) { System.out.println("\tComparing window on clusters tops and bottoms ..."); for (PositionListIndex pli : plis) { for (IntArrayList cluster : pli.getClusters()) { if (cluster.size() < this.windowSize) continue; for (int recordIndex = 0; recordIndex < this.windowSize; recordIndex++) { int recordId = cluster.getInt(recordIndex); for (int partnerRecordIndex = cluster.size() - 1; partnerRecordIndex > cluster.size() - this.windowSize; partnerRecordIndex--) { int partnerRecordId = cluster.getInt(partnerRecordIndex); if (recordId == partnerRecordId) continue; negCover.add(this.getViolatedFds(compressedRecords[recordId], compressedRecords[partnerRecordId])); } } } } }
/** * Given a list of integers 'intList', make all the permutations of the elements in the list. The result is stored * in a list of list of integers 'result'. The parameter 'k' should always be set to 0 (it is used for the purposes of * the recursion). * For example, if intList = [0, 1, 2], then the result would be: * result = [[0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 1, 0], [2, 0, 1]] * @param intList: list of integers * @param k: * @param result: permutations of the integer list (list of lists) */ private static void permute(IntArrayList intList, int k, ObjectArrayList<IntArrayList> result){ // Avoid waaay to many permutations if (k > 1000) { return; } for(int i = k; i < intList.size(); i++){ java.util.Collections.swap(intList, i, k); permute(intList, k + 1, result); java.util.Collections.swap(intList, k, i); } if (k == intList.size() -1){ result.add(intList.clone()); } }
public boolean refines(int[][] lhsInvertedPlis, int[] rhs) { for (IntArrayList cluster : this.clusters) { Object2IntOpenHashMap<IntArrayList> clustersMap = new Object2IntOpenHashMap<>(cluster.size()); // Check if all subclusters of this cluster point into the same other clusters for (int recordId : cluster) { IntArrayList additionalLhsCluster = this.buildClusterIdentifier(recordId, lhsInvertedPlis); if (additionalLhsCluster == null) continue; if (clustersMap.containsKey(additionalLhsCluster)) { if ((rhs[recordId] == -1) || (clustersMap.getInt(additionalLhsCluster) != rhs[recordId])) return false; } else { clustersMap.put(additionalLhsCluster, rhs[recordId]); } } } return true; }
private void dfs(DirectedGraph graph, int v) { visited[v] = true; onStack[v] = true; IntArrayList adj = graph.adj(v); for (int i = 0; i < adj.size(); i++) { int w = adj.getInt(i); if (hasCycle) return; else if (!visited[w]) { edgeTo[w] = v; dfs(graph, w); } else if (onStack[w]) { hasCycle = true; cycle.push(w); for (int r = v; r != w; r = edgeTo[r]) { cycle.push(r); } cycle.push(w); return; } } onStack[v] = false; }
private List<Score> scoreAllAsOneColumn(RankedItem<T> listItem, final InputCommand inputCommand) { IntArrayList indexesOfColumnBreaks = new IntArrayList(); StringBuilder allColumnText = new StringBuilder(); buildAllColumnTextAndIndexes(listItem, indexesOfColumnBreaks, allColumnText); Score allColumnScore = rankingStrategy.apply( inputCommand.getColumnFilterOptions(0), allColumnText.toString()); if (allColumnScore.rank > 0) { return convertScoreToMatchesPerColumn(allColumnText.toString(), allColumnScore, indexesOfColumnBreaks); } else { // There was no match. Add the empty to score to all columns List<Score> scores = new ArrayList<>(); for (FieldResolver<T> field : fields) { scores.add(allColumnScore); } return scores; } }
public Score scoreMultipleContiguousSequencesAnyOrder(final String[] words, final StringCursorPrimitive target) { int totalRank = 0; IntArrayList matches = new IntArrayList(); for (String word : words) { StringCursor targetCursor = new StringCursor(target).maskRegions(matches); Score score = scoreAsContiguousSequence(new StringCursorPrimitive(word), targetCursor.getCursorPrimitive()); if ( score.rank <= 0) { totalRank = 0; break; // all words must be found } totalRank += score.rank; matches.addAll(score.matches); } matches.sort(Comparator.naturalOrder()); return new Score(totalRank, matches); }
/** * Given a sequence of words and a pivot-word index, return the chained nouns from the left and from the right * of the pivot word. * @param sequence: a sequence of words (list of IndexedWord) * @param wordInd: the index of the pivot word * @return a list of chained nouns to the left and the right of the pivot word (the pivot word is included) */ public static ObjectArrayList<IndexedWord> getChainedNouns(ObjectArrayList<IndexedWord> sequence, int wordInd){ IntArrayList chainedNounsInd = new IntArrayList(); // Get the chained nouns from left and right IntArrayList chainedNounsLeft = getChainedNounsFromLeft(sequence, chainedNounsInd.clone(), wordInd); IntArrayList chainedNounsRight = getChainedNounsFromRight(sequence, chainedNounsInd.clone(), wordInd); // Add all the words to the chained nouns chainedNounsInd.addAll(chainedNounsLeft); chainedNounsInd.add(wordInd); chainedNounsInd.addAll(chainedNounsRight); // IndexedWord chained nouns ObjectArrayList<IndexedWord> iChainedNouns = new ObjectArrayList<IndexedWord>(); for (int i: FastUtil.sort(chainedNounsInd)){ iChainedNouns.add(sequence.get(i)); } return iChainedNouns; }
/** * Given a sequence of words and a pivot-word index, return the chained verbs from the left and from the right * of the pivot word. * @param sequence: a sequence of words (list of IndexedWord) * @param wordInd: the index of the pivot word * @return a list of chained verbs to the left and the right of the pivot word (the pivot word is included) */ public static ObjectArrayList<IndexedWord> getChainedVerbs(ObjectArrayList<IndexedWord> sequence, int wordInd){ IntArrayList chainedVerbsInd = new IntArrayList(); // Get the chained verbs from left and right IntArrayList chainedVerbsLeft = getChainedVerbsFromLeft(sequence, chainedVerbsInd.clone(), wordInd); IntArrayList chainedVerbsRight = getChainedVerbsFromRight(sequence, chainedVerbsInd.clone(), wordInd); // Add all the words to the chained verbs chainedVerbsInd.addAll(chainedVerbsLeft); chainedVerbsInd.add(wordInd); chainedVerbsInd.addAll(chainedVerbsRight); // IndexedWord chained verbs ObjectArrayList<IndexedWord> iChainedVerbs = new ObjectArrayList<IndexedWord>(); for (int i: FastUtil.sort(chainedVerbsInd)){ iChainedVerbs.add(sequence.get(i)); } return iChainedVerbs; }
/** * Given a sequence of words and a pivot-word index, return the "chained words" from the left and from the right * of the pivot word. "Chained words" are a list of words, which all of them share the same POS tag and have no * NE types. * * @param sequence: a sequence of words (list of IndexedWord) * @param wordInd: the index of the pivot word * @return a list of chained words to the left and the right of the pivot word (the pivot word is included) */ public static ObjectArrayList<IndexedWord> getChainedTagNoNER(ObjectArrayList<IndexedWord> sequence, int wordInd){ IntArrayList chainedPosWordsInd = new IntArrayList(); // Get the chained nouns from left and right IntArrayList chainedPosWordsLeft = getChainedTagsFromLeftNoNER(sequence, chainedPosWordsInd.clone(), wordInd); IntArrayList chainedPosWordsRight = getChainedTagsFromRightNoNER(sequence, chainedPosWordsInd.clone(), wordInd); // Add all the words to the chained nouns chainedPosWordsInd.addAll(chainedPosWordsLeft); chainedPosWordsInd.add(wordInd); chainedPosWordsInd.addAll(chainedPosWordsRight); // IndexedWord chained nouns ObjectArrayList<IndexedWord> iChainedNouns = new ObjectArrayList<IndexedWord>(); for (int i: FastUtil.sort(chainedPosWordsInd)){ iChainedNouns.add(sequence.get(i)); } return iChainedNouns; }
/** * Given a sequence of words and a pivot-word index, return the chained words of same NER, both from the left and * from the right of the pivot word (it is assumed that the pivot word is also NER). * @param sequence: a sequence of words (list of IndexedWord) * @param wordInd: the index of the pivot word * @return a list of chained nouns to the left and the right of the pivot word (the pivot word is included) */ public static ObjectArrayList<IndexedWord> getChainedNERs(ObjectArrayList<IndexedWord> sequence, int wordInd){ IntArrayList chainedNounsInd = new IntArrayList(); // Get the chained nouns from left and right IntArrayList chainedNounsLeft = getChainedNERsFromLeft(sequence, chainedNounsInd.clone(), wordInd, sequence.get(wordInd).ner()); IntArrayList chainedNounsRight = getChainedNERsFromRight(sequence, chainedNounsInd.clone(), wordInd, sequence.get(wordInd).ner()); // Add all the words to the chained nouns chainedNounsInd.addAll(chainedNounsLeft); chainedNounsInd.add(wordInd); chainedNounsInd.addAll(chainedNounsRight); // IndexedWord chained nouns ObjectArrayList<IndexedWord> iChainedNouns = new ObjectArrayList<IndexedWord>(); for (int i: FastUtil.sort(chainedNounsInd)){ iChainedNouns.add(sequence.get(i)); } return iChainedNouns; }
public IntStack pathTo(int w) { if (isConnectedTo(w)) { IntStack result = new IntArrayList(); for (int r = w; r != sourceVertex; r = edgeTo[r]) result.push(r); result.push(sourceVertex); return result; } else { return new IntArrayList(0); } }
@Override public TIntDoubleVector filter(double x) { IntArrayList nonzeroIndex = new IntArrayList(); int nonzero = 0; for (int i = 0; i < values.length; i++) { if (Math.abs(values[i]) > x) { nonzero++; nonzeroIndex.add(i); } } if (nonzero < values.length * 0.5) { LOG.debug(String.format("Dense Row filter generate a sparse row with nonzero %d", nonzero)); int[] newIndex = new int[nonzero]; System.arraycopy(nonzeroIndex.elements(), 0, newIndex, 0, nonzero); double[] newValue = new double[nonzero]; for (int i = 0; i < nonzero; i++) { newValue[i] = values[newIndex[i]]; } SparseDoubleSortedVector ret = new SparseDoubleSortedVector(dim, newIndex, newValue); ret.setRowId(rowId).setMatrixId(matrixId).setClock(clock); return ret; } else { return this; } }
public Statistics(final String algorithmName) { this.numApplicationsUniquenessPruning = new IntArrayList(); this.numApplicationsSwapPruning = new IntArrayList(); this.numApplicationsValidRhsPruning = new IntArrayList(); this.numApplicationsMinimalityLhsPruning = new IntArrayList(); this.numApplicationsMinimalityRhsPruning = new IntArrayList(); this.numApplicationsMergePruning = new IntArrayList(); this.numApplicationsEquivalenceLhsPruning = new IntArrayList(); this.numApplicationsEquivalenceRhsPruning = new IntArrayList(); this.numPartitionCombinationsBySize = new IntArrayList(); this.algorithmName = algorithmName; }
private void prune(Map<AttributeCombination, List<AttributeCombination>> naryDep2ref, IntArrayList attributeCombinationGroupIndexes, List<AttributeCombination> attributeCombinations) { List<AttributeCombination> attributeCombinationGroup = new ArrayList<AttributeCombination>(attributeCombinationGroupIndexes.size()); for (int attributeCombinationIndex : attributeCombinationGroupIndexes) attributeCombinationGroup.add(attributeCombinations.get(attributeCombinationIndex)); for (AttributeCombination attributeCombination : attributeCombinationGroup) if (naryDep2ref.containsKey(attributeCombination)) naryDep2ref.get(attributeCombination).retainAll(attributeCombinationGroup); }
public static IntList convertToIntList(OpenBitSet set) { IntList bits = new IntArrayList(); int lastIndex = set.nextSetBit(0); while (lastIndex != -1) { bits.add(lastIndex); lastIndex = set.nextSetBit(lastIndex + 1); } return bits; }
private void doRecusiveCrap(int currentAttribute, IntList currentOrdering, List<DifferenceSet> setsNotCovered, IntList currentPath, List<DifferenceSet> originalDiffSet, List<FunctionalDependencyGroup2> result) throws CouldNotReceiveResultException, ColumnNameMismatchException { // Basic Case // FIXME if (!currentOrdering.isEmpty() && /* BUT */setsNotCovered.isEmpty()) { if (this.debugSysout) System.out.println("no FDs here"); return; } if (setsNotCovered.isEmpty()) { List<OpenBitSet> subSets = this.generateSubSets(currentPath); if (this.noOneCovers(subSets, originalDiffSet)) { FunctionalDependencyGroup2 fdg = new FunctionalDependencyGroup2(currentAttribute, currentPath); this.addFdToReceivers(fdg); result.add(fdg); } else { if (this.debugSysout) { System.out.println("FD not minimal"); System.out.println(new FunctionalDependencyGroup2(currentAttribute, currentPath)); } } return; } // Recusive Case for (int i = 0; i < currentOrdering.size(); i++) { List<DifferenceSet> next = this.generateNextNotCovered(currentOrdering.getInt(i), setsNotCovered); IntList nextOrdering = this.generateNextOrdering(next, currentOrdering, currentOrdering.getInt(i)); IntList currentPathCopy = new IntArrayList(currentPath); currentPathCopy.add(currentOrdering.getInt(i)); this.doRecusiveCrap(currentAttribute, nextOrdering, next, currentPathCopy, originalDiffSet, result); } }
/** * Given a list of indexed words 'words', return an integer list of indices of the words * @param words: list of indexed words * @return list of indices of the words */ public static IntArrayList listOfWordsToIndexList(ObjectArrayList<IndexedWord> words){ IntArrayList indices = new IntArrayList(); for (IndexedWord word: words){ indices.add(word.index()); } return indices; }
public UndirectedGraph(int V) { this.V = V; adj = new IntArrayList[V]; for (int i = 0; i < V; i++) { adj[i] = new IntArrayList(); } }
public void runNext(UCCList newNonUCCs, int[][] compressedRecords) { this.windowDistance++; int numNewNonFds = 0; int numComparisons = 0; OpenBitSet equalAttrs = new OpenBitSet(this.posCover.getNumAttributes()); int previousNewNonFdsSize = newNonUCCs.size(); Iterator<IntArrayList> clusterIterator = this.clusters.iterator(); while (clusterIterator.hasNext()) { IntArrayList cluster = clusterIterator.next(); if (cluster.size() <= this.windowDistance) { clusterIterator.remove(); continue; } for (int recordIndex = 0; recordIndex < (cluster.size() - this.windowDistance); recordIndex++) { int recordId = cluster.getInt(recordIndex); int partnerRecordId = cluster.getInt(recordIndex + this.windowDistance); this.sampler.match(equalAttrs, compressedRecords[recordId], compressedRecords[partnerRecordId]); if (!this.negCover.contains(equalAttrs)) { OpenBitSet equalAttrsCopy = equalAttrs.clone(); this.negCover.add(equalAttrsCopy); newNonUCCs.add(equalAttrsCopy); this.memoryGuardian.memoryChanged(1); this.memoryGuardian.match(this.negCover, this.posCover, newNonUCCs); } numComparisons++; } } numNewNonFds = newNonUCCs.size() - previousNewNonFdsSize; this.numNewNonFds.add(numNewNonFds); this.numComparisons.add(numComparisons); }
protected List<HashMap<String, IntArrayList>> calculateClusterMaps(RelationalInput relationalInput, int numAttributes) throws InputIterationException { List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>(); for (int i = 0; i < numAttributes; i++) clusterMaps.add(new HashMap<String, IntArrayList>()); this.numRecords = 0; while (relationalInput.hasNext() && (this.inputRowLimit <= 0 || this.inputRowLimit != this.numRecords)) { List<String> record = relationalInput.next(); int attributeId = 0; for (String value : record) { HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId); if (clusterMap.containsKey(value)) { clusterMap.get(value).add(this.numRecords); } else { IntArrayList newCluster = new IntArrayList(); newCluster.add(this.numRecords); clusterMap.put(value, newCluster); } attributeId++; } this.numRecords++; if (this.numRecords == Integer.MAX_VALUE - 1) throw new RuntimeException("PLI encoding into integer based PLIs is not possible, because the number of records in the dataset exceeds Integer.MAX_VALUE. Use long based plis instead! (NumRecords = " + this.numRecords + " and Integer.MAX_VALUE = " + Integer.MAX_VALUE); } return clusterMaps; }
public static List<PositionListIndex> getPLIs(ObjectArrayList<List<String>> records, int numAttributes, boolean isNullEqualNull) throws InputIterationException { if (records.size() > Integer.MAX_VALUE) throw new RuntimeException("PLI encoding into integer based PLIs is not possible, because the number of records in the dataset exceeds Integer.MAX_VALUE. Use long based plis instead! (NumRecords = " + records.size() + " and Integer.MAX_VALUE = " + Integer.MAX_VALUE); List<HashMap<String, IntArrayList>> clusterMaps = calculateClusterMapsStatic(records, numAttributes); return fetchPositionListIndexesStatic(clusterMaps, isNullEqualNull); }
protected static List<HashMap<String, IntArrayList>> calculateClusterMapsStatic(ObjectArrayList<List<String>> records, int numAttributes) throws InputIterationException { List<HashMap<String, IntArrayList>> clusterMaps = new ArrayList<>(); for (int i = 0; i < numAttributes; i++) clusterMaps.add(new HashMap<String, IntArrayList>()); int recordId = 0; for (List<String> record : records) { int attributeId = 0; for (String value : record) { HashMap<String, IntArrayList> clusterMap = clusterMaps.get(attributeId); if (clusterMap.containsKey(value)) { clusterMap.get(value).add(recordId); } else { IntArrayList newCluster = new IntArrayList(); newCluster.add(recordId); clusterMap.put(value, newCluster); } attributeId++; } recordId++; } return clusterMaps; }
public PositionListIndex intersect(int[] otherPLI) { Int2ObjectMap<Int2ObjectMap<IntArrayList>> intersectMap = this.buildIntersectMap(otherPLI); List<IntArrayList> clusters = new ArrayList<>(); for (Int2ObjectMap<IntArrayList> cluster1 : intersectMap.values()) for (IntArrayList cluster2 : cluster1.values()) if (cluster2.size() > 1) clusters.add(cluster2); return new PositionListIndex(-1, clusters); }
protected boolean probe(int[][] compressedRecords, int rhsAttr, IntArrayList cluster) { int rhsClusterId = compressedRecords[cluster.getInt(0)][rhsAttr]; // If otherClusterId < 0, then this cluster must point into more than one other clusters if (rhsClusterId == -1) return false; // Check if all records of this cluster point into the same other cluster for (int recordId : cluster) if (compressedRecords[recordId][rhsAttr] != rhsClusterId) return false; return true; }
protected boolean probe(int[] rhsInvertedPli, IntArrayList cluster) { int rhsClusterId = rhsInvertedPli[cluster.getInt(0)]; // If otherClusterId < 0, then this cluster must point into more than one other clusters if (rhsClusterId == -1) return false; // Check if all records of this cluster point into the same other cluster for (int recordId : cluster) if (rhsInvertedPli[recordId] != rhsClusterId) return false; return true; }