/** * Creates a new <code>TIntFloatHashMap</code> instance containing * all of the entries in the map passed in. * * @param map a <tt>TIntFloatMap</tt> that will be duplicated. */ public TIntFloatHashMap( TIntFloatMap map ) { super( map.size() ); if ( map instanceof TIntFloatHashMap ) { TIntFloatHashMap hashmap = ( TIntFloatHashMap ) map; this._loadFactor = hashmap._loadFactor; this.no_entry_key = hashmap.no_entry_key; this.no_entry_value = hashmap.no_entry_value; //noinspection RedundantCast if ( this.no_entry_key != ( int ) 0 ) { Arrays.fill( _set, this.no_entry_key ); } //noinspection RedundantCast if ( this.no_entry_value != ( float ) 0 ) { Arrays.fill( _values, this.no_entry_value ); } setUp( (int) Math.ceil( DEFAULT_CAPACITY / _loadFactor ) ); } putAll( map ); }
public static double cosineSimilarity(TIntFloatMap X, TIntFloatMap Y) { double xDotX = 0.0; double yDotY = 0.0; double xDotY = 0.0; for (int id : X.keys()) { double x = X.get(id); xDotX += x * x; if (Y.containsKey(id)) { xDotY += x * Y.get(id); } } for (double y : Y.values()) { yDotY += y * y; } return xDotX * yDotY != 0 ? xDotY / Math.sqrt(xDotX * yDotY): 0.0; }
/** * Normalize a vector to unit length. * @param X * @return */ public static TIntFloatMap normalizeVector(TIntFloatMap X) { TIntFloatHashMap Y = new TIntFloatHashMap(); double sumSquares = 0.0; for (double x : X.values()) { sumSquares += x * x; } if (sumSquares != 0.0) { double norm = Math.sqrt(sumSquares); for (int id : X.keys()) { Y.put(id, (float) (X.get(id) / norm)); } return Y; } return X; }
public double compare( TIntFloatMap v1, TIntFloatMap v2, boolean sorted) { int overlaps = 0; TIntIterator iter = v1.keySet().iterator(); while (iter.hasNext()) { int key = iter.next(); if (v2.containsKey(key)) overlaps++; } return overlaps / (double)(v1.size() + v2.size() - overlaps); }
public double compare( TIntFloatMap v1, TIntFloatMap v2, boolean sorted) { //it does not matter if the vectors are sorted or not double DKL = 0.0; TIntFloatIterator iter = v1.iterator(); while (iter.hasNext()) { iter.advance(); int key = iter.key(); if (!v2.containsKey(key)) { continue; } double P = iter.value(); double Q = v2.get(key); DKL += Math.log(P/Q) * P; } return DKL; }
public static int[] getSortedIndices(TIntFloatMap vector) { // NOTE: it's probably possible to do this using purely primitive // operations without having to resort to pushing things into an // Index[]. However, this code is much cleaner to have and since we // sort at most once per vector and the result is memoized, we don't // lose too much from the Object-based sorting. Index[] keyValPairs = new Index[vector.size()]; TIntFloatIterator iter = vector.iterator(); int i = 0; while (iter.hasNext()) { iter.advance(); keyValPairs[i++] = new Index(iter.key(), iter.value()); } Arrays.sort(keyValPairs); int[] sortedIndices = new int[keyValPairs.length]; for (i = 0; i < keyValPairs.length; ++i) sortedIndices[i] = keyValPairs[i].key; return sortedIndices; }
/** * Normalizes the probability values in a vector so that to sum to 1.0 * @param vector * @return */ public static TIntFloatMap normalizeVector(TIntFloatMap vector) { float total = 0; TFloatIterator iter = vector.valueCollection().iterator(); while (iter.hasNext()) total += iter.next(); TIntFloatMap normalized = new TIntFloatHashMap(vector.size()); TIntFloatIterator iter2 = vector.iterator(); while (iter2.hasNext()) { iter2.advance(); normalized.put(iter2.key(), iter2.value() / total); } return normalized; }
@Test public void testGetSortedIndices() { TIntFloatMap m = new TIntFloatHashMap(); m.put(0, 1f); m.put(1, 10f); m.put(2, 5f); m.put(3, 2f); int[] sorted = SemSigUtils.getSortedIndices(m); assertEquals(4, sorted.length); assertEquals(1, sorted[0]); assertEquals(2, sorted[1]); assertEquals(3, sorted[2]); assertEquals(0, sorted[3]); }
/** {@inheritDoc} */ public void putAll( TIntFloatMap map ) { ensureCapacity( map.size() ); TIntFloatIterator iter = map.iterator(); while ( iter.hasNext() ) { iter.advance(); this.put( iter.key(), iter.value() ); } }
/** {@inheritDoc} */ @Override public boolean equals( Object other ) { if ( ! ( other instanceof TIntFloatMap ) ) { return false; } TIntFloatMap that = ( TIntFloatMap ) other; if ( that.size() != this.size() ) { return false; } float[] values = _values; byte[] states = _states; float this_no_entry_value = getNoEntryValue(); float that_no_entry_value = that.getNoEntryValue(); for ( int i = values.length; i-- > 0; ) { if ( states[i] == FULL ) { int key = _set[i]; float that_value = that.get( key ); float this_value = values[i]; if ( ( this_value != that_value ) && ( this_value != this_no_entry_value ) && ( that_value != that_no_entry_value ) ) { return false; } } } return true; }
public void readExternal( ObjectInput in ) throws IOException, ClassNotFoundException { // VERSION in.readByte(); // MAP _map = ( TIntFloatMap ) in.readObject(); }
/** {@inheritDoc} */ @Override public void putAll( TIntFloatMap map ) { ensureCapacity( map.size() ); TIntFloatIterator iter = map.iterator(); while ( iter.hasNext() ) { iter.advance(); this.put( iter.key(), iter.value() ); } }
/** {@inheritDoc} */ @Override public boolean equals( Object other ) { if ( ! ( other instanceof TIntFloatMap ) ) { return false; } TIntFloatMap that = ( TIntFloatMap ) other; if ( that.size() != this.size() ) { return false; } TFloatOffheapArray values = _values; TByteOffheapArray states = _states; float this_no_entry_value = getNoEntryValue(); float that_no_entry_value = that.getNoEntryValue(); for ( int i = capacity(); i-- > 0; ) { if ( states.get( i ) == FULL ) { int key = _set.get( i ); float that_value = that.get( key ); float this_value = values.get( i ); if ( ( this_value != that_value ) && ( this_value != this_no_entry_value ) && ( that_value != that_no_entry_value ) ) { return false; } } } return true; }
@Override public void readExternal( ObjectInput in ) throws IOException, ClassNotFoundException { // VERSION in.readByte(); // MAP _map = ( TIntFloatMap ) in.readObject(); }
public double cosineOutlink(int pageId1, int pageId2) { TIntSet outlinks1 = linkCache.getOutlinks(pageId1); TIntSet outlinks2 = linkCache.getOutlinks(pageId2); TIntFloatMap v1 = makeOutlinkVector(outlinks1); TIntFloatMap v2 = makeOutlinkVector(outlinks2); if (v1.isEmpty() || v2.isEmpty()) { return 0.0; } return SimUtils.cosineSimilarity(v1, v2); }