@Override public TFloatVector filter(float x) { SparseFloatVector vector = new SparseFloatVector(dim); vector.setMatrixId(matrixId).setRowId(rowId).setClock(clock); ObjectIterator<Int2FloatMap.Entry> iter = hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); float value = entry.getFloatValue(); if (Math.abs(value) > x) { vector.set(entry.getIntKey(), value); } } return vector; }
@Override public void writeTo(DataOutputStream output) throws IOException { try { lock.readLock().lock(); super.writeTo(output); output.writeInt(hashMap.size()); ObjectIterator<Int2FloatMap.Entry> iter = hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); output.writeInt(entry.getIntKey()); output.writeFloat(entry.getFloatValue()); } } finally { lock.readLock().unlock(); } }
@Override public void serialize(ByteBuf buf) { try { lock.readLock().lock(); super.serialize(buf); buf.writeInt(hashMap.size()); ObjectIterator<Int2FloatMap.Entry> iter = hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); buf.writeInt(entry.getIntKey()); buf.writeFloat(entry.getFloatValue()); } } finally { lock.readLock().unlock(); } }
/** * Merge this sparse float vector split to a index/value array * @param indexes index array * @param values value array * @param startPos write start position of the index/value array * @param len write length */ public void mergeTo(int[] indexes, float[] values, int startPos, int len) { try { lock.readLock().lock(); int writeLen = len < hashMap.size() ? len : hashMap.size(); if (writeLen == 0) { return; } int index = 0; for (Int2FloatMap.Entry entry : hashMap.int2FloatEntrySet()) { indexes[startPos + index] = entry.getIntKey(); values[startPos + index] = entry.getFloatValue(); index++; if (index == writeLen) { return; } } } finally { lock.readLock().unlock(); } }
private static double predict(final int user, final int itemI, @Nonnull final Int2ObjectMap<Int2FloatMap> knnItems, final int excludeIndex, @Nonnull final FloatMatrix weightMatrix) { final Int2FloatMap kNNu = knnItems.get(user); if (kNNu == null) { return 0.d; } double pred = 0.d; for (Int2FloatMap.Entry e : Fastutil.fastIterable(kNNu)) { final int itemK = e.getIntKey(); if (itemK == excludeIndex) { continue; } float ruk = e.getFloatValue(); pred += ruk * weightMatrix.get(itemI, itemK, 0.d); } return pred; }
private void replayTrain(@Nonnull final ByteBuffer buf) { final int itemI = buf.getInt(); final int knnSize = buf.getInt(); final Int2ObjectMap<Int2FloatMap> knnItems = new Int2ObjectOpenHashMap<>(1024); final IntSet pairItems = new IntOpenHashSet(); for (int i = 0; i < knnSize; i++) { int user = buf.getInt(); int ruSize = buf.getInt(); Int2FloatMap ru = new Int2FloatOpenHashMap(ruSize); ru.defaultReturnValue(0.f); for (int j = 0; j < ruSize; j++) { int itemK = buf.getInt(); pairItems.add(itemK); float ruk = buf.getFloat(); ru.put(itemK, ruk); } knnItems.put(user, ru); } for (int itemJ : pairItems) { train(itemI, knnItems, itemJ); } }
@Nonnull private static Int2ObjectMap<Int2FloatMap> kNNentries(@Nonnull final Object kNNiObj, @Nonnull final MapObjectInspector knnItemsOI, @Nonnull final PrimitiveObjectInspector knnItemsKeyOI, @Nonnull final MapObjectInspector knnItemsValueOI, @Nonnull final PrimitiveObjectInspector knnItemsValueKeyOI, @Nonnull final PrimitiveObjectInspector knnItemsValueValueOI, @Nullable Int2ObjectMap<Int2FloatMap> knnItems, @Nonnull final MutableInt nnzKNNi) { if (knnItems == null) { knnItems = new Int2ObjectOpenHashMap<>(1024); } else { knnItems.clear(); } int numElementOfKNNItems = 0; for (Map.Entry<?, ?> entry : knnItemsOI.getMap(kNNiObj).entrySet()) { int user = PrimitiveObjectInspectorUtils.getInt(entry.getKey(), knnItemsKeyOI); Int2FloatMap ru = int2floatMap(knnItemsValueOI.getMap(entry.getValue()), knnItemsValueKeyOI, knnItemsValueValueOI); knnItems.put(user, ru); numElementOfKNNItems += ru.size(); } nnzKNNi.setValue(numElementOfKNNItems); return knnItems; }
@Nonnull private static Int2FloatMap int2floatMap(@Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, @Nonnull final PrimitiveObjectInspector valueOI) { final Int2FloatMap result = new Int2FloatOpenHashMap(map.size()); result.defaultReturnValue(0.f); for (Map.Entry<?, ?> entry : map.entrySet()) { float v = PrimitiveObjectInspectorUtils.getFloat(entry.getValue(), valueOI); if (v == 0.f) { continue; } int k = PrimitiveObjectInspectorUtils.getInt(entry.getKey(), keyOI); result.put(k, v); } return result; }
@Override public long nonZeroNumber() { long ret = 0; if (hashMap != null) { ObjectIterator<Int2FloatMap.Entry> iter = this.hashMap.int2FloatEntrySet().fastIterator(); while (iter.hasNext()) { if (iter.next().getFloatValue() != 0) ret++; } } return ret; }
private TVector plus(DenseFloatVector other) { DenseFloatVector vec = new DenseFloatVector(other); ObjectIterator<Int2FloatMap.Entry> iter = this.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vec.plusBy(entry.getIntKey(), entry.getFloatValue()); } return vec; }
private TVector plus(SparseFloatVector other) { SparseFloatVector newVector = (SparseFloatVector) this.clone(); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); newVector.hashMap.addTo(entry.getIntKey(), entry.getFloatValue()); } return newVector; }
private TVector plus(DenseFloatVector other, float x) { DenseFloatVector vec = new DenseFloatVector(other); vec.timesBy(x); ObjectIterator<Int2FloatMap.Entry> iter = this.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vec.plusBy(entry.getIntKey(), entry.getFloatValue()); } return vec; }
private TVector plus(SparseFloatVector other, float x) { SparseFloatVector newVector = (SparseFloatVector) this.clone(); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); float fx = (float) x; Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); newVector.hashMap.addTo(entry.getIntKey(), fx * entry.getFloatValue()); } return newVector; }
private TVector plusBy(SparseFloatVector other) { ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); this.hashMap.addTo(entry.getIntKey(), entry.getFloatValue()); } return this; }
public TFloatVector plusBy(SparseFloatVector other, float x) { ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); float fx = (float) x; Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); this.hashMap.addTo(entry.getIntKey(), fx * entry.getFloatValue()); } return this; }
@Override public double squaredNorm() { ObjectIterator<Int2FloatMap.Entry> iter = hashMap.int2FloatEntrySet().iterator(); double sum = 0; while (iter.hasNext()) { float v = iter.next().getFloatValue(); sum += v * v; } return sum; }
@Override public TFloatVector times(float x) { SparseFloatVector vector = new SparseFloatVector(this.dim); ObjectIterator<Int2FloatMap.Entry> iter = this.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vector.hashMap.put(entry.getIntKey(), x * entry.getFloatValue()); } return vector; }
@Override public TFloatVector timesBy(float x) { ObjectIterator<Int2FloatMap.Entry> iter = this.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); this.hashMap.put(entry.getIntKey(), x * entry.getFloatValue()); } return this; }
private double dot(SparseFloatVector other) { double ret = 0.0; ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); while (iter.hasNext()) { Int2FloatMap.Entry entry = iter.next(); ret += values[entry.getIntKey()] * entry.getFloatValue(); } return ret; }
private TFloatVector plus(SparseFloatVector other) { DenseFloatVector vector = new DenseFloatVector(this); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vector.values[entry.getIntKey()] += entry.getFloatValue(); } return vector; }
private TFloatVector plusBy(SparseFloatVector other) { ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); while (iter.hasNext()) { Int2FloatMap.Entry entry = iter.next(); values[entry.getIntKey()] += entry.getFloatValue(); } return this; }
private TFloatVector plusBy(SparseFloatVector other, float x) { float fx = (float) x; ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); while (iter.hasNext()) { Int2FloatMap.Entry entry = iter.next(); values[entry.getIntKey()] += fx * entry.getFloatValue(); } return this; }
private TIntDoubleVector plus(SparseFloatVector other, double x) { DenseDoubleVector vector = this.clone(); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().iterator(); Int2FloatMap.Entry entry = null; while(iter.hasNext()) { entry = iter.next(); vector.values[entry.getIntKey()] += entry.getFloatValue() * x; } return vector; }
private TIntDoubleVector plus(SparseFloatVector other) { DenseDoubleVector vector = this.clone(); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vector.values[entry.getIntKey()] += entry.getFloatValue(); } return vector; }
private TIntDoubleVector plusBy(SparseFloatVector other, double x) { ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); while (iter.hasNext()) { Int2FloatMap.Entry entry = iter.next(); values[entry.getIntKey()] += entry.getFloatValue() * x; } return this; }
private TIntDoubleVector plusBy(SparseFloatVector other) { ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); values[entry.getIntKey()] += entry.getFloatValue(); } return this; }
private SparseDoubleVector plus(SparseFloatVector other, double x) { SparseDoubleVector vector = (SparseDoubleVector) this.clone(); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vector.hashMap.addTo(entry.getIntKey(), entry.getFloatValue() * x); } return vector; }
private SparseDoubleVector plus(SparseFloatVector other) { SparseDoubleVector vector = (SparseDoubleVector) this.clone(); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); vector.hashMap.addTo(entry.getIntKey(), entry.getFloatValue()); } return vector; }
private SparseDoubleVector plusBy(SparseFloatVector other, double x) { resize(other.size()); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); this.hashMap.addTo(entry.getIntKey(), entry.getFloatValue() * x); } return this; }
private SparseDoubleVector plusBy(SparseFloatVector other) { resize(other.size()); ObjectIterator<Int2FloatMap.Entry> iter = other.hashMap.int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); this.hashMap.addTo(entry.getIntKey(), entry.getFloatValue()); } return this; }
@Override public void serialize(ByteBuf buf) { super.serialize(buf); buf.writeInt(split.size()); LOG.debug("double size = " + split.size()); ObjectIterator<Int2FloatMap.Entry> iter = split.getIndexToValueMap().int2FloatEntrySet().fastIterator(); Int2FloatMap.Entry entry = null; while (iter.hasNext()) { entry = iter.next(); buf.writeInt(entry.getIntKey()); buf.writeFloat(entry.getFloatValue()); } }
@Override public void process(@Nonnull Object[] args) throws HiveException { if (_weightMatrix == null) {// initialize variables this._weightMatrix = new DoKFloatMatrix(); if (numIterations >= 2) { this._dataMatrix = new DoKFloatMatrix(); } this._nnzKNNi = new MutableInt(); } final int itemI = PrimitiveObjectInspectorUtils.getInt(args[0], itemIOI); if (itemI != _previousItemId || _ri == null) { // cache Ri and kNNi this._ri = int2floatMap(itemI, riOI.getMap(args[1]), riKeyOI, riValueOI, _dataMatrix, _ri); this._kNNi = kNNentries(args[2], knnItemsOI, knnItemsKeyOI, knnItemsValueOI, knnItemsValueKeyOI, knnItemsValueValueOI, _kNNi, _nnzKNNi); final int numKNNItems = _nnzKNNi.getValue(); if (numIterations >= 2 && numKNNItems >= 1) { recordTrainingInput(itemI, _kNNi, numKNNItems); } this._previousItemId = itemI; } int itemJ = PrimitiveObjectInspectorUtils.getInt(args[3], itemJOI); Int2FloatMap rj = int2floatMap(itemJ, rjOI.getMap(args[4]), rjKeyOI, rjValueOI, _dataMatrix); train(itemI, _ri, _kNNi, itemJ, rj); _observedTrainingExamples++; }
private void train(final int itemI, @Nonnull final Int2FloatMap ri, @Nonnull final Int2ObjectMap<Int2FloatMap> kNNi, final int itemJ, @Nonnull final Int2FloatMap rj) { final FloatMatrix W = _weightMatrix; final int N = rj.size(); if (N == 0) { return; } double gradSum = 0.d; double rateSum = 0.d; double lossSum = 0.d; for (Int2FloatMap.Entry e : Fastutil.fastIterable(rj)) { int user = e.getIntKey(); double ruj = e.getFloatValue(); double rui = ri.get(user); // ri.getOrDefault(user, 0.f); double eui = rui - predict(user, itemI, kNNi, itemJ, W); gradSum += ruj * eui; rateSum += ruj * ruj; lossSum += eui * eui; } gradSum /= N; rateSum /= N; double wij = W.get(itemI, itemJ, 0.d); double loss = lossSum / N + 0.5d * l2 * wij * wij + l1 * wij; _cvState.incrLoss(loss); W.set(itemI, itemJ, getUpdateTerm(gradSum, rateSum, l1, l2)); }
private void train(final int itemI, @Nonnull final Int2ObjectMap<Int2FloatMap> knnItems, final int itemJ) { final FloatMatrix A = _dataMatrix; final FloatMatrix W = _weightMatrix; final int N = A.numColumns(itemJ); if (N == 0) { return; } final MutableDouble mutableGradSum = new MutableDouble(0.d); final MutableDouble mutableRateSum = new MutableDouble(0.d); final MutableDouble mutableLossSum = new MutableDouble(0.d); A.eachNonZeroInRow(itemJ, new VectorProcedure() { @Override public void apply(int user, double ruj) { double rui = A.get(itemI, user, 0.d); double eui = rui - predict(user, itemI, knnItems, itemJ, W); mutableGradSum.addValue(ruj * eui); mutableRateSum.addValue(ruj * ruj); mutableLossSum.addValue(eui * eui); } }); double gradSum = mutableGradSum.getValue() / N; double rateSum = mutableRateSum.getValue() / N; double wij = W.get(itemI, itemJ, 0.d); double loss = mutableLossSum.getValue() / N + 0.5 * l2 * wij * wij + l1 * wij; _cvState.incrLoss(loss); W.set(itemI, itemJ, getUpdateTerm(gradSum, rateSum, l1, l2)); }
@Nonnull public static ObjectIterable<Int2FloatMap.Entry> fastIterable(@Nonnull final Int2FloatMap map) { final ObjectSet<Int2FloatMap.Entry> entries = map.int2FloatEntrySet(); return entries instanceof Int2FloatMap.FastEntrySet ? new ObjectIterable<Int2FloatMap.Entry>() { public ObjectIterator<Int2FloatMap.Entry> iterator() { return ((Int2FloatMap.FastEntrySet) entries).fastIterator(); } } : entries; }
/** * Create a SparseFloatVector consisting of double values according to the * specified mapping of indices and values. * * @param values the values to be set as values of the real vector * @param dimensionality the dimensionality of this feature vector * @throws IllegalArgumentException if the given dimensionality is too small * to cover the given values (i.e., the maximum index of any value not * zero is bigger than the given dimensionality) */ public SparseFloatVector(Int2FloatOpenHashMap values, int dimensionality) throws IllegalArgumentException { if(values.size() > dimensionality) { throw new IllegalArgumentException("values.size() > dimensionality!"); } this.indexes = new int[values.size()]; this.values = new float[values.size()]; // Import and sort the indexes { ObjectIterator<Int2FloatMap.Entry> iter = values.int2FloatEntrySet().fastIterator(); for(int i = 0; iter.hasNext(); i++) { this.indexes[i] = iter.next().getIntKey(); } Arrays.sort(this.indexes); } // Import the values accordingly { for(int i = 0; i < values.size(); i++) { this.values[i] = values.get(this.indexes[i]); } } this.dimensionality = dimensionality; final int maxdim = getMaxDim(); if(maxdim > dimensionality) { throw new IllegalArgumentException("Given dimensionality " + dimensionality + " is too small w.r.t. the given values (occurring maximum: " + maxdim + ")."); } }
@Override public void writePartition(DataOutput out, int partitionId) throws IOException { Int2FloatOpenHashMap partitionMap = map.get(partitionId); out.writeInt(partitionMap.size()); ObjectIterator<Int2FloatMap.Entry> iterator = partitionMap.int2FloatEntrySet().fastIterator(); while (iterator.hasNext()) { Int2FloatMap.Entry entry = iterator.next(); out.writeInt(entry.getIntKey()); out.writeFloat(entry.getFloatValue()); } }
private void recordTrainingInput(final int itemI, @Nonnull final Int2ObjectMap<Int2FloatMap> knnItems, final int numKNNItems) throws HiveException { ByteBuffer buf = this._inputBuf; NioStatefullSegment dst = this._fileIO; if (buf == null) { // invoke only at task node (initialize is also invoked in compilation) final File file; try { file = File.createTempFile("hivemall_slim", ".sgmt"); // to save KNN data file.deleteOnExit(); if (!file.canWrite()) { throw new UDFArgumentException("Cannot write a temporary file: " + file.getAbsolutePath()); } } catch (IOException ioe) { throw new UDFArgumentException(ioe); } this._inputBuf = buf = ByteBuffer.allocateDirect(8 * 1024 * 1024); // 8MB this._fileIO = dst = new NioStatefullSegment(file, false); } int recordBytes = SizeOf.INT + SizeOf.INT + SizeOf.INT * 2 * knnItems.size() + (SizeOf.INT + SizeOf.FLOAT) * numKNNItems; int requiredBytes = SizeOf.INT + recordBytes; // need to allocate space for "recordBytes" itself int remain = buf.remaining(); if (remain < requiredBytes) { writeBuffer(buf, dst); } buf.putInt(recordBytes); buf.putInt(itemI); buf.putInt(knnItems.size()); for (Int2ObjectMap.Entry<Int2FloatMap> e1 : Fastutil.fastIterable(knnItems)) { int user = e1.getIntKey(); buf.putInt(user); Int2FloatMap ru = e1.getValue(); buf.putInt(ru.size()); for (Int2FloatMap.Entry e2 : Fastutil.fastIterable(ru)) { buf.putInt(e2.getIntKey()); buf.putFloat(e2.getFloatValue()); } } }
@Nonnull private static Int2FloatMap int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, @Nonnull final PrimitiveObjectInspector valueOI, @Nullable final FloatMatrix dataMatrix) { return int2floatMap(item, map, keyOI, valueOI, dataMatrix, null); }
public static Int2FloatMap int2FloatMap() { return new Int2FloatOpenHashMap(); // return new Int2FloatAVLTreeMap(); }