@SuppressWarnings({ "rawtypes", "unchecked" }) public static Map invert(Map inputMap) throws InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, ClassNotFoundException { LOGGER.info("Inverting map..."); Map outputMap = (Map) invertMapType(inputMap.getClass()).getConstructor(new Class[] {}).newInstance(new Object[] {}); ProgressLogger pl = new ProgressLogger(LOGGER, "entries"); pl.expectedUpdates = inputMap.size(); pl.start(); for (Object entryObj : inputMap.entrySet()) { Map.Entry entry = (Map.Entry) entryObj; Object oldValue = outputMap.put(entry.getValue(), entry.getKey()); if (oldValue != null) throw new IllegalArgumentException( "The value " + entry.getValue() + " is associated to both '" + oldValue + "' and '" + entry.getKey() + "'. The map is not" + "bijective" ); pl.lightUpdate(); } pl.done(); return outputMap; }
public static void truncateMapByHighestValue(Int2DoubleMap map, int howMany) { if (howMany >= map.size()) return; double[] values = map.values().toDoubleArray(); Arrays.sort(values); // not best strategy, i know org.apache.commons.lang.ArrayUtils.reverse(values); double threshold = values[howMany+1]; ObjectIterator<Entry> mapIterator = map.int2DoubleEntrySet().iterator(); int inserted = 0; while (mapIterator.hasNext()) if (mapIterator.next().getDoubleValue() < threshold || inserted >= howMany) mapIterator.remove(); else inserted++; }
private final Long2DoubleOpenHashMap verifyCandidates(final Vector v, Long2DoubleOpenHashMap accumulator) { Long2DoubleOpenHashMap matches = new Long2DoubleOpenHashMap(); for (Long2DoubleMap.Entry e : accumulator.long2DoubleEntrySet()) { final long candidateID = e.getLongKey(); if (Double.compare(e.getDoubleValue() + ps.get(candidateID), theta) < 0) // A[y] = dot(x, y'') continue; // l2 pruning Vector residual = residuals.get(candidateID); assert (residual != null); final double ds1 = e.getDoubleValue() + Math.min(v.maxValue() * residual.sumValues(), residual.maxValue() * v.sumValues()); if (Double.compare(ds1, theta) < 0) continue; // dpscore, eq. (5) final double sz2 = e.getDoubleValue() + Math.min(v.maxValue() * residual.size(), residual.maxValue() * v.size()); if (Double.compare(sz2, theta) < 0) continue; // dpscore, eq. (9) final long deltaT = v.timestamp() - candidateID; double score = e.getDoubleValue() + Vector.similarity(v, residual); // dot(x, y) = A[y] + dot(x, y') score *= forgettingFactor(lambda, deltaT); // apply forgetting factor numSimilarities++; if (Double.compare(score, theta) >= 0) // final check matches.put(candidateID, score); } numMatches += matches.size(); return matches; }
private final Long2DoubleOpenHashMap verifyCandidates(final Vector v, Long2DoubleOpenHashMap accumulator) { Long2DoubleOpenHashMap matches = new Long2DoubleOpenHashMap(); for (Long2DoubleMap.Entry e : accumulator.long2DoubleEntrySet()) { final long candidateID = e.getLongKey(); if (Double.compare(e.getDoubleValue() + ps.get(candidateID), theta) < 0) // A[y] = dot(x, y'') continue; // l2 pruning final Vector residual = residuals.get(candidateID); assert (residual != null); final double ds1 = e.getDoubleValue() + Math.min(v.maxValue() * residual.sumValues(), residual.maxValue() * v.sumValues()); if (Double.compare(ds1, theta) < 0) continue; // dpscore, eq. (5) final double sz2 = e.getDoubleValue() + Math.min(v.maxValue() * residual.size(), residual.maxValue() * v.size()); if (Double.compare(sz2, theta) < 0) continue; // dpscore, eq. (9) final long deltaT = v.timestamp() - candidateID; double score = e.getDoubleValue() + Vector.similarity(v, residual); // dot(x, y) = A[y] + dot(x, y') score *= forgettingFactor(lambda, deltaT); // apply forgetting factor numSimilarities++; if (Double.compare(score, theta) >= 0) // final check matches.put(candidateID, score); } numMatches += matches.size(); return matches; }
private Long2DoubleOpenHashMap verifyCandidates(final Vector v, Long2DoubleOpenHashMap accumulator) { Long2DoubleOpenHashMap matches = new Long2DoubleOpenHashMap(); for (Long2DoubleMap.Entry e : accumulator.long2DoubleEntrySet()) { long candidateID = e.getLongKey(); Vector candidateResidual = residuals.get(candidateID); assert (candidateResidual != null); double score = e.getDoubleValue() + Vector.similarity(v, candidateResidual); // A[y] + dot(y',x) long deltaT = v.timestamp() - candidateID; score *= Commons.forgettingFactor(lambda, deltaT); // apply forgetting factor numSimilarities++; if (Double.compare(score, theta) >= 0) // final check matches.put(candidateID, score); } numMatches += matches.size(); return matches; }
private Vector addToIndex(final Vector v) { double pscore = 0; Vector residual = new Vector(v.timestamp()); for (Entry e : v.int2DoubleEntrySet()) { int dimension = e.getIntKey(); double weight = e.getDoubleValue(); pscore += weight * maxVector.get(dimension); if (Double.compare(pscore, theta) >= 0) { PostingList list; if ((list = idx.get(dimension)) == null) { list = new PostingList(); idx.put(dimension, list); } list.add(v.timestamp(), weight); size++; // v.remove(dimension); } else { residual.put(dimension, weight); } } return residual; }
@Test public void testIterationOrder() { Vector v = new Vector(); v.put(0, 0); v.put(1, 1); v.put(2, 2); v.put(3, 3); v.put(4, 4); BidirectionalIterator<Int2DoubleMap.Entry> it = v.int2DoubleEntrySet().fastIterator(v.int2DoubleEntrySet().last()); // for (int i = 0; i < v.size(); i++) { int i = 4; while (it.hasPrevious()) { Entry e = it.previous(); int lastKey = e.getIntKey(); double lastValue = e.getDoubleValue(); assertEquals(i, lastKey); assertEquals(i, lastValue, Double.MIN_NORMAL); i--; } assertEquals(5, v.size()); assertEquals(0, v.int2DoubleEntrySet().first().getIntKey()); assertEquals(4, v.int2DoubleEntrySet().last().getIntKey()); }
@Test public void testNormalize() { Vector v = new Vector(); v.put(0, 0.5); v.put(1, 0.5); v.put(2, 0.5); v.put(3, 0.5); assertEquals(1, v.magnitude(), Double.MIN_NORMAL); Vector n = Vector.l2normalize(v); assertEquals(v, n); assertEquals(0.5, n.maxValue, Double.MIN_NORMAL); assertTrue(v == n); v = new Vector(); v.put(0, 0.5); v.put(1, 0.5); v.put(2, 0.5); n = Vector.l2normalize(v); assertEquals(1, n.magnitude(), Double.MIN_NORMAL); assertEquals(3, n.size()); Iterator<Entry> it = v.int2DoubleEntrySet().fastIterator(); for (Entry e : n.int2DoubleEntrySet()) { assertEquals(e.getIntKey(), it.next().getIntKey()); // same order } assertEquals(0.5 / v.magnitude(), n.maxValue(), Double.MIN_NORMAL); // max is updated }
@Override public Int2DoubleMap scores(int docI) { Int2DoubleMap originalScores = scorer.scores(docI); ObjectIterator<Entry> iterator = originalScores.int2DoubleEntrySet().iterator(); Int2DoubleMap newScores = new Int2DoubleOpenHashMap(originalScores.size()); for (Entry entry = iterator.next(); iterator.hasNext(); entry = iterator.next()) { newScores.put(entry.getIntKey(), (entry.getDoubleValue() - mean) / stddev ); } return newScores; }
private final Long2DoubleOpenHashMap generateCandidates(final Vector v, final boolean addToIndex) { final Long2DoubleOpenHashMap accumulator = new Long2DoubleOpenHashMap(size); for (Entry e : v.int2DoubleEntrySet()) { final int dimension = e.getIntKey(); final double queryWeight = e.getDoubleValue(); PostingList list; if ((list = idx.get(dimension)) != null) { for (PostingEntry pe : list) { numPostingEntries++; final long targetID = pe.getID(); final double targetWeight = pe.getWeight(); final double additionalSimilarity = queryWeight * targetWeight; accumulator.addTo(targetID, additionalSimilarity); } } if (addToIndex) { if (list == null) { list = new PostingList(); idx.put(dimension, list); } list.add(v.timestamp(), queryWeight); size++; } } numCandidates += accumulator.size(); return accumulator; }
private final Long2DoubleOpenHashMap verifyCandidates(final Vector v, Long2DoubleOpenHashMap accumulator) { numSimilarities = numCandidates; // add forgetting factor e^(-lambda*delta_T) and filter candidates < theta for (Iterator<Long2DoubleMap.Entry> it = accumulator.long2DoubleEntrySet().iterator(); it.hasNext();) { Long2DoubleMap.Entry e = it.next(); final long deltaT = v.timestamp() - e.getLongKey(); e.setValue(e.getDoubleValue() * forgettingFactor(lambda, deltaT)); if (Doubles.compare(e.getDoubleValue(), theta) < 0) it.remove(); } numMatches += accumulator.size(); return accumulator; }
private final Long2DoubleOpenHashMap generateCandidates(final Vector v) { final Long2DoubleOpenHashMap accumulator = new Long2DoubleOpenHashMap(); double l2remscore = 1, // rs4 rst = 1, squaredQueryPrefixMagnitude = 1; for (BidirectionalIterator<Entry> vecIter = v.int2DoubleEntrySet().fastIterator(v.int2DoubleEntrySet().last()); vecIter .hasPrevious();) { // iterate over v in reverse order final Entry e = vecIter.previous(); final int dimension = e.getIntKey(); final double queryWeight = e.getDoubleValue(); // x_j final double rscore = l2remscore; squaredQueryPrefixMagnitude -= queryWeight * queryWeight; L2APPostingList list; if ((list = idx.get(dimension)) != null) { for (L2APPostingEntry pe : list) { numPostingEntries++; final long targetID = pe.id(); // y if (accumulator.containsKey(targetID) || Double.compare(rscore, theta) >= 0) { final double targetWeight = pe.weight(); // y_j final double additionalSimilarity = queryWeight * targetWeight; // x_j * y_j accumulator.addTo(targetID, additionalSimilarity); // A[y] += x_j * y_j final double l2bound = accumulator.get(targetID) + FastMath.sqrt(squaredQueryPrefixMagnitude) * pe.magnitude(); // A[y] + ||x'_j|| * ||y'_j|| if (Double.compare(l2bound, theta) < 0) accumulator.remove(targetID); // prune this candidate (early verification) } } rst -= queryWeight * queryWeight; // rs_t -= x_j^2 l2remscore = FastMath.sqrt(rst); // rs_4 = sqrt(rs_t) } } numCandidates += accumulator.size(); return accumulator; }
private final Vector addToIndex(final Vector v) { double bt = 0, b3 = 0, pscore = 0; boolean psSaved = false; Vector residual = new Vector(v.timestamp()); for (Entry e : v.int2DoubleEntrySet()) { int dimension = e.getIntKey(); double weight = e.getDoubleValue(); pscore = b3; bt += weight * weight; b3 = FastMath.sqrt(bt); if (Double.compare(b3, theta) >= 0) { if (!psSaved) { assert (!ps.containsKey(v.timestamp())); ps.put(v.timestamp(), pscore); psSaved = true; } L2APPostingList list; if ((list = idx.get(dimension)) == null) { list = new L2APPostingList(); idx.put(dimension, list); } list.add(v.timestamp(), weight, pscore); size++; } else { residual.put(dimension, weight); } } return residual; }
private final Long2DoubleOpenHashMap generateCandidates(final Vector v) { final Long2DoubleOpenHashMap accumulator = new Long2DoubleOpenHashMap(); double remscore = Vector.similarity(v, maxVectorInIndex); // rs3, enhanced remscore bound double l2remscore = 1, // rs4 rst = 1, squaredQueryPrefixMagnitude = 1; for (BidirectionalIterator<Entry> vecIter = v.int2DoubleEntrySet().fastIterator(v.int2DoubleEntrySet().last()); vecIter .hasPrevious();) { // iterate over v in reverse order final Entry e = vecIter.previous(); final int dimension = e.getIntKey(); final double queryWeight = e.getDoubleValue(); // x_j final double rscore = Math.min(remscore, l2remscore); squaredQueryPrefixMagnitude -= queryWeight * queryWeight; L2APPostingList list; if ((list = idx.get(dimension)) != null) { for (L2APPostingEntry pe : list) { numPostingEntries++; final long targetID = pe.id(); // y if (accumulator.containsKey(targetID) || Double.compare(rscore, theta) >= 0) { final double targetWeight = pe.weight(); // y_j final double additionalSimilarity = queryWeight * targetWeight; // x_j * y_j accumulator.addTo(targetID, additionalSimilarity); // A[y] += x_j * y_j final double l2bound = accumulator.get(targetID) + FastMath.sqrt(squaredQueryPrefixMagnitude) * pe.magnitude(); // A[y] + ||x'_j|| * ||y'_j|| if (Double.compare(l2bound, theta) < 0) accumulator.remove(targetID); // prune this candidate (early verification) } } remscore -= queryWeight * maxVectorInIndex.get(dimension); // rs_3 -= x_j * \hat{c_w} rst -= queryWeight * queryWeight; // rs_t -= x_j^2 l2remscore = FastMath.sqrt(rst); // rs_4 = sqrt(rs_t) } } numCandidates += accumulator.size(); return accumulator; }
private final Vector addToIndex(final Vector v) { double b1 = 0, bt = 0, b3 = 0, pscore = 0; boolean psSaved = false; Vector residual = new Vector(v.timestamp()); for (Entry e : v.int2DoubleEntrySet()) { int dimension = e.getIntKey(); double weight = e.getDoubleValue(); pscore = Math.min(b1, b3); b1 += weight * maxVectorInWindow.get(dimension); bt += weight * weight; b3 = FastMath.sqrt(bt); if (Double.compare(Math.min(b1, b3), theta) >= 0) { if (!psSaved) { assert (!ps.containsKey(v.timestamp())); ps.put(v.timestamp(), pscore); psSaved = true; } L2APPostingList list; if ((list = idx.get(dimension)) == null) { list = new L2APPostingList(); idx.put(dimension, list); } list.add(v.timestamp(), weight, pscore); size++; } else { residual.put(dimension, weight); } } maxVectorInIndex.updateMaxByDimension(v); return residual; }
private Long2DoubleOpenHashMap generateCandidates(final Vector v) { Long2DoubleOpenHashMap accumulator = new Long2DoubleOpenHashMap(); double remscore = Vector.similarity(v, maxVector); /* candidate generation */ for (BidirectionalIterator<Entry> it = v.int2DoubleEntrySet().fastIterator(v.int2DoubleEntrySet().last()); it .hasPrevious();) { // iterate over v in reverse order Entry e = it.previous(); int dimension = e.getIntKey(); double queryWeight = e.getDoubleValue(); // x_j PostingList list; if ((list = idx.get(dimension)) != null) { for (PostingEntry pe : list) { numPostingEntries++; final long targetID = pe.getID(); // y if (accumulator.containsKey(targetID) || Double.compare(remscore, theta) >= 0) { final double targetWeight = pe.getWeight(); // y_j final double additionalSimilarity = queryWeight * targetWeight; // x_j * y_j accumulator.addTo(targetID, additionalSimilarity); // A[y] += x_j * y_j } } } remscore -= queryWeight * maxVector.get(dimension); } numCandidates += accumulator.size(); return accumulator; }
private final Vector addToIndex(final Vector v) { double bt = 0, b3 = 0, pscore = 0; boolean psSaved = false; final Vector residual = new Vector(v.timestamp()); for (Entry e : v.int2DoubleEntrySet()) { final int dimension = e.getIntKey(); final double weight = e.getDoubleValue(); pscore = b3; bt += weight * weight; b3 = FastMath.sqrt(bt); if (Double.compare(b3, theta) >= 0) { // bound larger than threshold, start indexing if (!psSaved) { ps.put(v.timestamp(), pscore); psSaved = true; } StreamingL2APPostingList list; if ((list = idx.get(dimension)) == null) { list = new StreamingL2APPostingList(); idx.put(dimension, list); } list.add(v.timestamp(), weight, pscore); size++; } else { residual.put(dimension, weight); } } return residual; }
private final Vector addToIndex(final Vector v) { double b1 = 0, bt = 0, b3 = 0, pscore = 0; boolean psSaved = false; final Vector residual = new Vector(v.timestamp()); for (Entry e : v.int2DoubleEntrySet()) { final int dimension = e.getIntKey(); final double weight = e.getDoubleValue(); pscore = Math.min(b1, b3); b1 += weight * maxVector.get(dimension); bt += weight * weight; b3 = FastMath.sqrt(bt); if (Double.compare(Math.min(b1, b3), theta) >= 0) { // bound larger than threshold, start indexing if (!psSaved) { ps.put(v.timestamp(), pscore); psSaved = true; } StreamingL2APPostingList list; if ((list = idx.get(dimension)) == null) { list = new StreamingL2APPostingList(); idx.put(dimension, list); } list.add(v.timestamp(), weight, pscore); size++; } else { residual.put(dimension, weight); } } return residual; }
private final void generateCandidates(final Vector v) { double l2remscore = 1, // rs4 rst = 1, squaredQueryPrefixMagnitude = 1; for (BidirectionalIterator<Entry> vecIter = v.int2DoubleEntrySet().fastIterator(v.int2DoubleEntrySet().last()); vecIter .hasPrevious();) { // iterate over v in reverse order final Entry e = vecIter.previous(); final int dimension = e.getIntKey(); final double queryWeight = e.getDoubleValue(); // x_j final double rscore = l2remscore; squaredQueryPrefixMagnitude -= queryWeight * queryWeight; StreamingL2APPostingList list; if ((list = idx.get(dimension)) != null) { for (StreamingL2APPostingListIterator listIter = list.reverseIterator(); listIter.hasPrevious();) { final L2APPostingEntry pe = listIter.previous(); final long targetID = pe.id(); // y // time filtering final long deltaT = v.timestamp() - targetID; if (Doubles.compare(deltaT, tau) > 0) { listIter.next(); // back off one position size -= listIter.nextIndex(); // update index size before cutting listIter.cutHead(); // prune the head break; } numPostingEntries++; final double ff = forgettingFactor(lambda, deltaT); // forgetting factor applied directly to the l2prefix bound if (accumulator.containsKey(targetID) || Double.compare(rscore * ff, theta) >= 0) { final double targetWeight = pe.weight(); // y_j final double additionalSimilarity = queryWeight * targetWeight; // x_j * y_j accumulator.addTo(targetID, additionalSimilarity); // A[y] += x_j * y_j final double l2bound = accumulator.get(targetID) + FastMath.sqrt(squaredQueryPrefixMagnitude) * pe.magnitude(); // A[y] + ||x'_j|| * ||y'_j|| // forgetting factor applied directly to the l2sum bound if (Double.compare(l2bound * ff, theta) < 0) { accumulator.remove(targetID); // prune this candidate (early verification) } } } rst -= queryWeight * queryWeight; // rs_t -= x_j^2 l2remscore = FastMath.sqrt(rst); // rs_4 = sqrt(rs_t) } } numCandidates += accumulator.size(); }
private void prunePS(long lowWatermark) { ObjectIterator<Long2DoubleMap.Entry> it = ps.long2DoubleEntrySet().fastIterator(); while (it.hasNext() && it.next().getLongKey() < lowWatermark) { it.remove(); } }
private final void generateCandidates(final Vector v) { double remscore = maxVectorInIndex.simimarityFF(v); // rs3, enhanced remscore bound with addded forgetting factor per dimension double l2remscore = 1, // rs4 rst = 1, squaredQueryPrefixMagnitude = 1; for (BidirectionalIterator<Entry> vecIter = v.int2DoubleEntrySet().fastIterator(v.int2DoubleEntrySet().last()); vecIter .hasPrevious();) { // iterate over v in reverse order final Entry e = vecIter.previous(); final int dimension = e.getIntKey(); final double queryWeight = e.getDoubleValue(); // x_j squaredQueryPrefixMagnitude -= queryWeight * queryWeight; StreamingL2APPostingList list; if ((list = idx.get(dimension)) != null) { for (StreamingL2APPostingListIterator listIter = list.iterator(); listIter.hasNext();) { final L2APPostingEntry pe = listIter.next(); final long targetID = pe.id(); // y // time filtering numPostingEntries++; final long deltaT = v.timestamp() - targetID; if (Doubles.compare(deltaT, tau) > 0) { listIter.remove(); size--; continue; } final double ff = forgettingFactor(lambda, deltaT); // forgetting factor applied directly to the l2prefix bounds final double rscore = Math.min(remscore, l2remscore * ff); if (accumulator.containsKey(targetID) || Double.compare(rscore, theta) >= 0) { final double targetWeight = pe.weight(); // y_j final double additionalSimilarity = queryWeight * targetWeight; // x_j * y_j accumulator.addTo(targetID, additionalSimilarity); // A[y] += x_j * y_j final double l2bound = accumulator.get(targetID) + FastMath.sqrt(squaredQueryPrefixMagnitude) * pe.magnitude(); // A[y] + ||x'_j|| * ||y'_j|| // forgetting factor applied directly to the l2bound if (Double.compare(l2bound * ff, theta) < 0) accumulator.remove(targetID); // prune this candidate (early verification) } } final double dimFF = maxVectorInIndex.dimensionFF(dimension, v.timestamp()); remscore -= queryWeight * maxVectorInIndex.get(dimension) * dimFF; // rs_3 -= x_j * \hat{c_w} rst -= queryWeight * queryWeight; // rs_t -= x_j^2 l2remscore = FastMath.sqrt(rst); // rs_4 = sqrt(rs_t) } } numCandidates += accumulator.size(); }