public void phase3(int id) { Map<Integer, Int2LongMap> h = JCL_FacadeImpl.GetHashMap(String.valueOf(id)); Int2LongMap result = new Int2LongOpenHashMap(); for (Int2LongMap m : h.values()) { for (int i : m.keySet()) { if (result.containsKey(i)) { long j = m.get(i) + result.get(i); result.put(i, j); } else { result.put(i, m.get(i)); } } m.clear(); } long freqT = 0; for (Long v : result.values()) freqT += v; System.err.println("ID: " + id + " size: " + result.size() + " freqT: " + freqT); h.clear(); h.put(id, result); // result.clear(); // result = null; }
/** * Find the parent entity for each. * * @param parents Parent counter map * @param parent Parent storage (output) * @param cnt Count storage (output) */ private void findParent(Int2LongOpenHashMap[] parents, int[] parent, long[] cnt) { parent[0] = 0; cnt[0] = Long.MAX_VALUE; for(int i = 1; i < parents.length; i++) { int best = i; long total = 0, bcount = -1; for(Int2LongMap.Entry p : parents[i].int2LongEntrySet()) { final long c = p.getLongValue(); total += c; if(c > bcount || (c == bcount && c < best)) { bcount = c; best = p.getIntKey(); } } cnt[i] = total; parent[i] = best; } }
@Nonnull public static ObjectIterable<Int2LongMap.Entry> fastIterable(@Nonnull final Int2LongMap map) { final ObjectSet<Int2LongMap.Entry> entries = map.int2LongEntrySet(); return entries instanceof Int2LongMap.FastEntrySet ? new ObjectIterable<Int2LongMap.Entry>() { public ObjectIterator<Int2LongMap.Entry> iterator() { return ((Int2LongMap.FastEntrySet) entries).fastIterator(); } } : entries; }
private String buildInputDataPartitionSchema(List<JCL_result> r, int numOfJCLThreads){ IntSet sorted = new IntAVLTreeSet(); long totalF=0; Int2LongMap map = new Int2LongOpenHashMap(); for(JCL_result oneR:r){ try{ @SuppressWarnings("unchecked") List<String> l = (List<String>) oneR.getCorrectResult(); for(String s : l){ String[] args = s.split(":"); int key = Integer.parseInt(args[0]); long freq = Long.parseLong(args[1]); sorted.add(key); if(map.containsKey(key)){ freq+=map.get(key); totalF+=map.get(key); } else totalF+=freq; map.put(key, freq); } }catch(Exception e){} } long load=0; int b; String result = ""; for(int ac:sorted){ load += map.get(ac); if(load > (totalF/(numOfJCLThreads))){ b=ac; result += b + ":"; load=0; } } return result; }
@Override public Iterator<GroupStats> getGroupStats(final EZImhotepSession session, final Map<Integer, GroupKey> groupKeys, final List<StatReference> statRefs, final long timeoutTS) throws ImhotepOutOfMemoryException { if(groupKeys.isEmpty()) { // we don't have any parent groups probably because all docs were filtered out return Collections.<GroupStats>emptyList().iterator(); } final StatReference countStatRef = session.pushStat(countStat); final long[] counts = getCounts(countStatRef); final Int2ObjectMap<Int2LongMap> groupToPositionToStats = getPercentileStats(session, groupKeys, countStatRef, counts); final List<GroupStats> result = Lists.newArrayList(); final int statCount = statRefs.size(); final int groupCount = session.getNumGroups(); // get values for the normal stats final TIntObjectHashMap<double[]> statsResults = (statCount > 0) ? getGroupStatsValues(session, statRefs, groupCount) : null; // combine normal stats with distinct counts for (int groupNum = 1; groupNum < groupCount; groupNum++) { final Int2LongMap groupPercentileData = groupToPositionToStats.get(groupNum); double[] statsVals = statsResults != null ? statsResults.get(groupNum) : null; double[] values = new double[statCount + fields.size()]; for(int i = 0, statsValsIndex = 0; i < values.length; i++) { if(groupPercentileData != null && groupPercentileData.containsKey(i)) { // percentile value values[i] = groupPercentileData.get(i); } else if(statsVals != null && statsValsIndex < statsVals.length) { values[i] = statsVals[statsValsIndex++]; // normal stat value available } else { values[i] = 0; // normal stat not in stats array } } GroupKey groupKey = groupKeys.get(groupNum); result.add(new GroupStats(groupKey, values)); } return result.iterator(); }
@Override protected void forwardModel() throws HiveException { this._model = null; this._fieldList = null; this._sumVfX = null; final int factors = _factors; final IntWritable idx = new IntWritable(); final FloatWritable Wi = new FloatWritable(0.f); final FloatWritable[] Vi = HiveUtils.newFloatArray(factors, 0.f); final List<FloatWritable> ViObj = Arrays.asList(Vi); final Object[] forwardObjs = new Object[4]; String modelId = HadoopUtils.getUniqueTaskIdString(); forwardObjs[0] = new Text(modelId); forwardObjs[1] = idx; forwardObjs[2] = Wi; forwardObjs[3] = null; // Vi // W0 idx.set(0); Wi.set(_ffmModel.getW0()); forward(forwardObjs); final Entry entryW = new Entry(_ffmModel._buf, 1); final Entry entryV = new Entry(_ffmModel._buf, _ffmModel._factor); final float[] Vf = new float[factors]; for (Int2LongMap.Entry e : Fastutil.fastIterable(_ffmModel._map)) { // set i final int i = e.getIntKey(); idx.set(i); final long offset = e.getLongValue(); if (Entry.isEntryW(i)) {// set Wi entryW.setOffset(offset); float w = entryV.getW(); if (w == 0.f) { continue; // skip w_i=0 } Wi.set(w); forwardObjs[2] = Wi; forwardObjs[3] = null; } else {// set Vif entryV.setOffset(offset); entryV.getV(Vf); for (int f = 0; f < factors; f++) { Vi[f].set(Vf[f]); } forwardObjs[2] = null; forwardObjs[3] = ViObj; } forward(forwardObjs); } }
public List<String> phase1(int id, String name, int numJCLThreads) { Int2LongMap values = new Int2LongOpenHashMap(1000000); long totalF = 0; System.err.println("file: " + name); try { File f = new File("../" + name + "/" + name + ".bin"); InputStream in = new BufferedInputStream(new FileInputStream(f)); FastBufferedInputStream fb = new FastBufferedInputStream(in); byte[] i = new byte[4]; while (fb.read(i) == 4) { int k = java.nio.ByteBuffer.wrap(i).getInt(); if (!values.containsKey(k)) values.put(k, 1); else { long aux = values.get(k); aux++; values.put(k, aux); } totalF++; } fb.close(); in.close(); // primeira modificacao //for (long v : values.values()) // totalF += v; IntSet sorted = new IntAVLTreeSet(values.keySet()); long acumula = 0; int b = 0; List<String> result = new LinkedList<>(); long blinha = 0; int last = 0; for (int ac : sorted) { blinha = values.get(ac); acumula += blinha; if (acumula > (totalF / (numJCLThreads))) { b = ac; result.add(b + ":" + acumula); acumula = 0; } last = ac; } // segunda modificacao if(acumula != 0) result.add(last + ":" + acumula); JCL_facade jcl = JCL_FacadeImpl.getInstanceLambari(); jcl.instantiateGlobalVar(id, values); sorted.clear(); sorted = null; return result; } catch (Exception e) { e.printStackTrace(); return null; } }
public void phase2(int id, int numJCLThreads, String schema) { JCL_facade jcl = JCL_FacadeImpl.getInstanceLambari(); Int2LongMap sorted = (Int2LongMap) jcl.getValue(id).getCorrectResult(); jcl.deleteGlobalVar(id); String[] chunks = schema.split(":"); int i = 0; Int2LongMap[] finais = new Int2LongMap[numJCLThreads]; for (int r = 0; r < numJCLThreads; r++) finais[r] = new Int2LongOpenHashMap(); for (int ii : sorted.keySet()) { i = 0; if (ii >= Integer.parseInt(chunks[chunks.length - 1])) finais[numJCLThreads - 1].put(ii, sorted.get(ii)); else { if (ii < Integer.parseInt(chunks[0])) finais[0].put(ii, sorted.get(ii)); else { tag: { for (int k = 1; k < chunks.length; k++) { if (Integer.parseInt(chunks[i]) <= ii && ii < Integer.parseInt(chunks[k])) { finais[i + 1].put(ii, sorted.get(ii)); break tag; } i++; } } } } } for (int r = 0; r < numJCLThreads; r++) { if(!finais[r].isEmpty()) { JCL_map<Integer, Int2LongMap> h = new JCLHashMap<>(String.valueOf(r)); h.put(id, finais[r]); long fT = 0; for (long kk : finais[r].values()) fT += kk; System.err.println(finais[r].size() + ":" + fT + " phase 2 putting in jcl - thread id " + r); finais[r].clear(); finais[r] = null; } } sorted.clear(); sorted = null; chunks = null; }
public static Int2LongMap int2LongMap() { return new Int2LongOpenHashMap(); // return new Int2LongAVLTreeMap(); }