Python numpy 模块,argpartition() 实例源码


项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, labels, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    n_recall = max(int(numpy.sum(labels[video_index])), 1)
    # labels
    label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
    label_predictions = [(class_index, predictions[video_index][class_index]) 
                           for class_index in label_indices]
    label_predictions = sorted(label_predictions, key=lambda p: -p[1])
    label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
    # predictions
    top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    top_k_predictions = [(class_index, predictions[video_index][class_index])
                         for class_index in top_k_indices]
    top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
    top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
    # compute PERR
    top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
    positives = [labels[video_index][class_index] 
                 for class_index in top_n_indices]
    perr = sum(positives) / float(n_recall)
    # URL
    url = "" + video_ids[video_index].decode('utf-8')
    yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"
项目:cupy    作者:cupy    | 项目源码 | 文件源码
def argpartition(a, kth, axis=-1):
    """Returns the indices that would partially sort an array.

        a (cupy.ndarray): Array to be sorted.
        kth (int or sequence of ints): Element index to partition by. If
            supplied with a sequence of k-th it will partition all elements
            indexed by k-th of them into their sorted position at once.
        axis (int or None): Axis along which to sort. Default is -1, which
            means sort along the last axis. If None is supplied, the array is
            flattened before sorting.

        cupy.ndarray: Array of the same type and shape as ``a``.

    .. note::
        For its implementation reason, `cupy.argpartition` fully sorts the
        given array as `cupy.argsort` does. It also does not support ``kind``
        and ``order`` parameters that ``numpy.argpartition`` supports.

    .. seealso:: :func:`numpy.argpartition`

    return a.argpartition(kth, axis=axis)
项目:SlidingWindowVideoTDA    作者:ctralie    | 项目源码 | 文件源码
def CSMToBinary(D, Kappa):
    Turn a cross-similarity matrix into a binary cross-simlarity matrix
    If Kappa = 0, take all neighbors
    If Kappa < 1 it is the fraction of mutual neighbors to consider
    Otherwise Kappa is the number of mutual neighbors to consider
    N = D.shape[0]
    M = D.shape[1]
    if Kappa == 0:
        return np.ones((N, M))
    elif Kappa < 1:
        NNeighbs = int(np.round(Kappa*M))
        NNeighbs = Kappa
    J = np.argpartition(D, NNeighbs, 1)[:, 0:NNeighbs]
    I = np.tile(np.arange(N)[:, None], (1, NNeighbs))
    V = np.ones(I.size)
    [I, J] = [I.flatten(), J.flatten()]
    ret = sparse.coo_matrix((V, (I, J)), shape=(N, M))
    return ret.toarray()
项目:DrQA    作者:facebookresearch    | 项目源码 | 文件源码
def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len( <= k:
            o_sort = np.argsort(
            o = np.argpartition(, k)[0:k]
            o_sort = o[np.argsort([o])]

        doc_scores =[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores
项目:Y8M    作者:mpekalski    | 项目源码 | 文件源码
def bottom_top_k_along_row(arr, k, ordered=True):
    """ bottom and top k of a 2d np.array, along the rows
    assert k>0, "bottom_top_k_along_row/column() requires k>0."
    rows = arr.shape[0]
    if ordered:
        tmp = np.argsort(arr, axis=1)      
        idx_bot = tmp[:, :k]
        idx_top = tmp[:,-k:]
        idx_bot = np.argpartition(arr, k, axis=1)[:,:k]
        idx_top = np.argpartition(arr, -k, axis=1)[:,-k:]

    indices = np.concatenate((idx_bot, idx_top), axis=1)
    vals = arr[np.repeat(np.arange(rows), 2*k), indices.ravel()].reshape(rows,2*k)
    return vals, indices
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def top_k_recommendations(self, sequence, k=10, exclude=None, **kwargs):
        if exclude is None:
            exclude = []

        last_item = int(sequence[-1][0])
        if last_item not in self.previous_recommendations:

        all_recommendations = deepcopy(self.previous_recommendations[last_item])
        for s in sequence:
            all_recommendations[int(s[0])] = 0
        for i in exclude:
            all_recommendations[i] = 0

        ranking = np.zeros(self.n_items)
        for i, x in enumerate(all_recommendations.most_common(k)):
            ranking[x[0]] = k-i
        return np.argpartition(-ranking, range(k))[:k]
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)

        if exclude is None:
            exclude = []

        last_item = sequence[-1][0]
        output =[user_id, :], self.V_item_user.T) +[last_item, :], self.V_next_prev.T)

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)

        if exclude is None:
            exclude = []

        user_items = [i[0] for i in sequence]
        output = self.item_score(user_id, user_items)

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)

        if exclude is None:
            exclude = []

        last_item = sequence[-1][0]
        output = self.bias +[user_id, :], self.H.T)

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def top_k_recommendations(self, sequence, user_id=None, k=10, exclude=None, **kwargs):
        ''' Recieves a sequence of (id, rating), and produces k recommendations (as a list of ids)

        # Compile network if needed
        if not hasattr(self, 'predict_function'):

        # Prepare RNN input
        X = np.zeros((1, self._input_size())) # input of the RNN
        X[0, :] = self._one_hot_encoding([i[0] for i in sequence])

        # Run RNN
        output = self.predict_function(X.astype(theano.config.floatX))[0]

        # Put low similarity to viewed items to exclude them from recommendations
        output[[i[0] for i in sequence]] = -np.inf
        output[exclude] = -np.inf

        # find top k according to output
        return list(np.argpartition(-output, range(k))[:k])
项目:sequence-based-recommendations    作者:rdevooght    | 项目源码 | 文件源码
def _compile_test_function(self):
        ''' Differs from base test function because of the added softmax operation
        print("Compiling test...")
        deterministic_output = T.nnet.softmax(lasagne.layers.get_output(self.l_out, deterministic=True))
        if self.interactions_are_unique:
            deterministic_output *= (1 - self.exclude)

        theano_test_function = theano.function(self.theano_inputs, deterministic_output, allow_input_downcast=True, name="Test_function", on_unused_input='ignore')

        def precision_test_function(theano_inputs, k=10):
            output = theano_test_function(*theano_inputs)
            ids = np.argpartition(-output, range(k), axis=-1)[0, :k]

            return ids

        self.test_function = precision_test_function
        print("Compilation done.")
项目:sockeye    作者:awslabs    | 项目源码 | 文件源码
def smallest_k(matrix: np.ndarray, k: int,
               only_first_row: bool = False) -> Tuple[Tuple[np.ndarray, np.ndarray], np.ndarray]:
    Find the smallest elements in a numpy matrix.

    :param matrix: Any matrix.
    :param k: The number of smallest elements to return.
    :param only_first_row: If true the search is constrained to the first row of the matrix.
    :return: The row indices, column indices and values of the k smallest items in matrix.
    if only_first_row:
        flatten = matrix[:1, :].flatten()
        flatten = matrix.flatten()

    # args are the indices in flatten of the k smallest elements
    args = np.argpartition(flatten, k)[:k]
    # args are the indices in flatten of the sorted k smallest elements
    args = args[np.argsort(flatten[args])]
    # flatten[args] are the values for args
    return np.unravel_index(args, matrix.shape), flatten[args]
项目:krpcScripts    作者:jwvanderbeck    | 项目源码 | 文件源码
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
项目:cebl    作者:idfah    | 项目源码 | 文件源码
def probs(self, x):
        dists = np.hstack([self.distFunc(x, cls) for cls in self.trainData])
        indices = np.argpartition(dists, self.k, axis=1)[:,:self.k]

        #start = 0
        #votes = list()
        #for cls in self.trainData:
        #    end = start + cls.shape[0]
        #    votes.append(np.sum(np.logical_and(start <= indices, indices < end), axis=1))
        #    start = end

        ends = np.cumsum([len(cls) for cls in self.trainData])
        starts = ends - np.array([len(cls) for cls in self.trainData])
        votes = [np.sum(np.logical_and(start <= indices, indices < end), axis=1)
                 for start, end in zip(starts, ends)]
        votes = np.vstack(votes).T

        #probs = np.zeros((x.shape[0], self.nCls))
        #probs[np.arange(probs.shape[0]), np.argmax(votes, axis=1)] = 1.0
        ##probs = util.softmax(votes / float(self.k))
        probs = votes / float(self.k)

        return probs
项目:hyperstar    作者:nlpub    | 项目源码 | 文件源码
def argmaxk_rows_opt1(arr, k=10, sort=False):
    Optimized implementation. When sort=False it is equal to argmaxk_rows_basic. When sort=True and k << arr.shape[1],
    it is should be faster, because we argsort only subarray of k max elements from each row of arr (arr.shape[0] x k) instead of
    the whole array arr (arr.shape[0] x arr.shape[1]).
    best_inds = np.argpartition(arr, kth=-k, axis=1)[:, -k:]  # column indices of k max elements in each row (m x k)
    if not sort:
        return best_inds
    # generate row indices corresponding to best_ids (just current row id in each row) (m x k)
    rows = np.arange(best_inds.shape[0], dtype=np.intp)[:, np.newaxis].repeat(best_inds.shape[1], axis=1)
    best_elems = arr[rows, best_inds]  # select k max elements from each row using advanced indexing (m x k)
    # indices which sort each row of best_elems in descending order (m x k)
    best_elems_inds = np.argsort(best_elems, axis=1)[:, ::-1]
    # reorder best_indices so that arr[i, sorted_best_inds[i,:]] will be sorted in descending order
    sorted_best_inds = best_inds[rows, best_elems_inds]
    return sorted_best_inds
项目:semihin    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}):
        X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param)
        cosX = cosine_similarity(X)
        #return sparse.csc_matrix(,newIds
        n = cosX.shape[0]
        graph = np.zeros((n,n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]:
                if j == i:
                #graph[i, j] += cosX[i, j]
                #graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic

        return sparse.csc_matrix(graph), newIds
项目:semihin    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def generateCosineNeighborGraphfromX(X, kNeighbors=10):
        cosX = cosine_similarity(X)
        # return sparse.csc_matrix(,newIds
        #print cosX.shape
        n = cosX.shape[0]
        graph = np.zeros((n, n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]:
                if j == i:
                # graph[i, j] += cosX[i, j]
                # graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic
        #print 'graph generation done in %f seconds.' % toc
        return sparse.csc_matrix(graph)
项目:semihin    作者:HKUST-KnowComp    | 项目源码 | 文件源码
def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors):
    # Generate cosine similarity graph
    n = X_ent.shape[0]
    cosX = cosine_similarity(X_word)
    graph = np.zeros((n, n))
    for i in range(n):
        for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
            if j == i:
            graph[i, j] = cosX[i, j]
            graph[j, i] = cosX[i, j]

    D = sparse.diags([graph.sum(axis=0)], [0])
    L = D - graph
    f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1))
    score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10)
    laplacian_score = score
    return laplacian_score
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
项目:harpreif    作者:harpribot    | 项目源码 | 文件源码
def compute_nearest_neighbors(self, num_neighbors):
        result_list = []
        for key, value in self.im2index.iteritems():
            neighbor_list = [key]
            similarity_scores = self.similarity_mat[value]
            # removes best match as same as key
            ind = np.argpartition(similarity_scores, -(num_neighbors + 1))[-(num_neighbors + 1):-1]
            ind = ind[np.argsort(similarity_scores[ind])]
            neighbors = [self.index2im[x] for x in ind]


        # compute neighbor statistics
        NearestNeighbour.compute_neighbor_stats(result_list, num_neighbors)

        # plot the TSNE plot

        return result_list
项目:marvin    作者:aikanor    | 项目源码 | 文件源码
def _calculate_topk_ndces(self, k):
        Calculate the indices of the k specialists with highest b-value, 
        including the base classifier regardless of its b-value. 

            k: int >= 0, approximately specifying the number of derived specialists to select. 
                Precisely, the best k (by Wilson error bound) are taken, along with the 
                base classifier if it is not already one of the best k. 

            A list containing the indices of the top k classifiers. 
            The list always at least contains the base classifier's index (i.e. 0).
            Therefore, the list is of length k if the base classifier is one of the top k, 
            and length k+1 otherwise. If k is greater than the total number of derived 
            specialists, returns all of them.
        assert self.label_corrs is not None , "Label correlations must be calculated before top k indices."
        if k < len(self.label_corrs):
            topk_ndces = set(np.argpartition(-self.label_corrs, k)[:k])  #Only does a partial sort of b!
            topk_ndces = set(range(len(self.label_corrs)))
        return list(topk_ndces & set(self._relevant_ndces))
项目:nonce2vec    作者:minimalparts    | 项目源码 | 文件源码
def argsort(x, topn=None, reverse=False):
    Return indices of the `topn` smallest elements in array `x`, in ascending order.

    If reverse is True, return the greatest elements instead, in descending order.

    x = np.asarray(x)  # unify code path for when `x` is not a np array (list, tuple...)
    if topn is None:
        topn = x.size
    if topn <= 0:
        return []
    if reverse:
        x = -x
    if topn >= x.size or not hasattr(np, 'argpartition'):
        return np.argsort(x)[:topn]
    # np >= 1.8 has a fast partial argsort, use that!
    most_extreme = np.argpartition(x, topn)[:topn]
    return most_extreme.take(np.argsort(x.take(most_extreme)))  # resort topn into order
项目:aws-lambda-numpy    作者:vitolimandibhrata    | 项目源码 | 文件源码
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
项目:DrQA_cn    作者:AmoseKang    | 项目源码 | 文件源码
def closest_docs(self, query, k=1):
        """Closest docs by dot product between query and documents
        in tfidf weighted word vector space.
        spvec = self.text2spvec(query)
        res = spvec * self.doc_mat

        if len( <= k:
            o_sort = np.argsort(
            o = np.argpartition(, k)[0:k]
            o_sort = o[np.argsort([o])]

        doc_scores =[o_sort]
        doc_ids = [self.get_doc_id(i) for i in res.indices[o_sort]]
        return doc_ids, doc_scores
项目:pylmnn    作者:johny-c    | 项目源码 | 文件源码
def _select_target_neighbors(self):
        """Find the target neighbors of each sample, that stay fixed during training.

            An array of neighbors indices for each sample with shape (n_samples, n_neighbors).

        """'Finding target neighbors...')
        target_neighbors = np.empty((self.X_.shape[0], self.n_neighbors_), dtype=int)
        for class_ in self.classes_:
            class_ind, = np.where(np.equal(self.y_, class_))
            dist = euclidean_distances(self.X_[class_ind], squared=True)
            np.fill_diagonal(dist, np.inf)
            neigh_ind = np.argpartition(dist, self.n_neighbors_ - 1, axis=1)
            neigh_ind = neigh_ind[:, :self.n_neighbors_]
            # argpartition doesn't guarantee sorted order, so we sort again but only the k neighbors
            row_ind = np.arange(len(class_ind))[:, None]
            neigh_ind = neigh_ind[row_ind, np.argsort(dist[row_ind, neigh_ind])]
            target_neighbors[class_ind] = class_ind[neigh_ind]

        return target_neighbors
项目:hred-latent-piecewise    作者:julianser    | 项目源码 | 文件源码
def select_next_words(self, next_costs, next_probs, step_num, how_many):
        # Pick only on the first line (for the beginning of sampling)
        # This will avoid duplicate <q> token.
        if step_num == 0:
            flat_next_costs = next_costs[:1, :].flatten()
            # Set the next cost to infinite for finished utterances (they will be replaced)
            # by other utterances in the beam
            flat_next_costs = next_costs.flatten()

        voc_size = next_costs.shape[1]

        args = numpy.argpartition(flat_next_costs, how_many)[:how_many]
        args = args[numpy.argsort(flat_next_costs[args])]

        return numpy.unravel_index(args, next_costs.shape), flat_next_costs[args]
项目:nmt    作者:Playinf    | 项目源码 | 文件源码
def find_nbest(score, n, threshold=None):
    num_vars = score.shape[1]

    score = score.flatten()
    nbest = np.argpartition(score, n)[:n]

    beam_indices = nbest / num_vars
    var_indices = nbest % num_vars
    nbest_score = score[nbest]

    if threshold:
        best = np.max(nbest_score)
        cond = nbest_score > best + threshold
        nbest_score = nbest_score[cond]
        beam_indices = beam_indices[cond]
        var_indices = var_indices[cond]

    return nbest_score, beam_indices, var_indices
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def tfidf_retrieval(tfidf_vec, train_contexts_txt, train_responses_txt, output_file):
    print type(tfidf_vec)
    tfidf_vec = tfidf_vec.toarray()
    print tfidf_vec.shape
    prod_mat =, tfidf_vec.T)
    print prod_mat.shape
    prod_mat = prod_mat / mat_vector_2norm_squared(tfidf_vec)
    print prod_mat.shape

    response_list = []
    for i in xrange(len(prod_mat)):
        row = prod_mat[i]
        # No idea what's going on here. See the following page:
        ind = np.argpartition(row, -2)[-2:]
        ind = ind[np.argsort(row[ind])][0]
        print train_contexts_txt[i]
        print response_list[i]

    with open(output_file, 'w') as f1:
        for response in response_list:
项目:workspace    作者:nojima    | 项目源码 | 文件源码
def visualize_frequent_words(vectors_2d: np.ndarray, dataset: DataSet, k: int, ax: plt.Axes = None) -> None:
    word_ids, counts = np.unique(, return_counts=True)

    indices = np.argpartition(-counts, k)[:k]
    frequent_word_ids = word_ids[indices]

    if ax is None:
        fig, ax = plt.subplots(figsize=(13, 13))
        fig = None

    vectors_2d = vectors_2d[frequent_word_ids]

    ax.scatter(vectors_2d[:, 0], vectors_2d[:, 1], s=2, alpha=0.25)
    for i, id in enumerate(frequent_word_ids):
        ax.annotate(dataset.vocabulary.to_word(id), (vectors_2d[i, 0], vectors_2d[i, 1]))

    if fig is not None:
项目:lambda-numba    作者:rlhotovy    | 项目源码 | 文件源码
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
项目:KanervaCoding    作者:JadenTravnik    | 项目源码 | 文件源码
def GetFeatures(self, data):
                closestPrototypesIndxs = []
        D = self.layers[0] - (np.array(data)*self.stateScale + self.bias)
        D = np.sqrt(sum(D.T**2))    # a bottlenect for sure
        indexes = np.argpartition(D, self.c[0], axis=0)[:self.c[0]]

        for i in range(1,len(self.layers)):
            D = np.sum(np.setxor1d(self.layers[i], indexes, True), axis=1)
#           phi = np.zeros(self.prototypeList[i])
#           phi[indexes] = 1
#           D = np.sum(np.logical_xor(self.layers[i], phi), axis=1)
            indexes = np.argpartition(D, self.c[i], axis=0)[:self.c[i]]

                return indexes
项目:SerpentAI    作者:SerpentAI    | 项目源码 | 文件源码
def process_frame_for_game_play(frame):
    """Assumes a grayscale frame"""
    histogram = skimage.exposure.histogram(frame[40:])

    if np.unique(histogram[0]).size < 3:
        return None

    max_indices = np.argpartition(histogram[0], -3)[-3:]

    for index in sorted(max_indices)[:2]:
        frame[frame == index] = 0

    threshold = skimage.filters.threshold_otsu(frame[40:])
    bw_frame = frame > threshold

    return bw_frame
项目:deliver    作者:orchestor    | 项目源码 | 文件源码
def test_partition_cdtype(self):
        d = np.array([('Galahad', 1.7, 38), ('Arthur', 1.8, 41),
                   ('Lancelot', 1.9, 38)],
                  dtype=[('name', '|S10'), ('height', '<f8'), ('age', '<i4')])

        tgt = np.sort(d, order=['age', 'height'])
        assert_array_equal(np.partition(d, range(d.size),
                                        order=['age', 'height']),
        assert_array_equal(d[np.argpartition(d, range(d.size),
                                             order=['age', 'height'])],
        for k in range(d.size):
            assert_equal(np.partition(d, k, order=['age', 'height'])[k],
            assert_equal(d[np.argpartition(d, k, order=['age', 'height'])][k],

        d = np.array(['Galahad', 'Arthur', 'zebra', 'Lancelot'])
        tgt = np.sort(d)
        assert_array_equal(np.partition(d, range(d.size)), tgt)
        for k in range(d.size):
            assert_equal(np.partition(d, k)[k], tgt[k])
            assert_equal(d[np.argpartition(d, k)][k], tgt[k])
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

    float: The average precision at equal recall rate across the entire batch.
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def top_k_triplets(predictions, labels, k=20):
  """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
  (prediction, class) format"""
  m = len(predictions)
  k = min(k, m)
  indices = numpy.argpartition(predictions, -k)[-k:]
  return [(index, predictions[index], labels[index]) for index in indices]
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

    float: The average precision at equal recall rate across the entire batch.
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def top_k_triplets(predictions, labels, k=20):
  """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
  (prediction, class) format"""
  m = len(predictions)
  k = min(k, m)
  indices = numpy.argpartition(predictions, -k)[-k:]
  return [(index, predictions[index], labels[index]) for index in indices]
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
  #  print("Type - Test :")
  #  print(type(video_ids[video_index]))
  #  print(video_ids[video_index].decode('utf-8'))
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def calculate_precision_at_equal_recall_rate(predictions, actuals):
  """Performs a local (numpy) calculation of the PERR.

    predictions: Matrix containing the outputs of the model.
      Dimensions are 'batch' x 'num_classes'.
    actuals: Matrix containing the ground truth labels.
      Dimensions are 'batch' x 'num_classes'.

    float: The average precision at equal recall rate across the entire batch.
  aggregated_precision = 0.0
  num_videos = actuals.shape[0]
  for row in numpy.arange(num_videos):
    num_labels = int(numpy.sum(actuals[row]))
    top_indices = numpy.argpartition(predictions[row],
    item_precision = 0.0
    for label_index in top_indices:
      if predictions[row][label_index] > 0:
        item_precision += actuals[row][label_index]
    item_precision /= top_indices.size
    aggregated_precision += item_precision
  aggregated_precision /= num_videos
  return aggregated_precision
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def top_k_triplets(predictions, labels, k=20):
  """Get the top_k for a 1-d numpy array. Returns a sparse list of tuples in
  (prediction, class) format"""
  m = len(predictions)
  k = min(k, m)
  indices = numpy.argpartition(predictions, -k)[-k:]
  return [(index, predictions[index], labels[index]) for index in indices]
项目:youtube-8m    作者:wangheda    | 项目源码 | 文件源码
def format_lines(video_ids, predictions, top_k):
  batch_size = len(video_ids)
  for video_index in range(batch_size):
    top_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
    line = [(class_index, predictions[video_index][class_index])
            for class_index in top_indices]
    line = sorted(line, key=lambda p: -p[1])
    yield video_ids[video_index].decode('utf-8') + "," + " ".join("%i %f" % pair
                                                  for pair in line) + "\n"
项目:wmd-relax    作者:src-d    | 项目源码 | 文件源码
def __call__(self, words, weights, vocabulary_max):
        if len(words) < vocabulary_max * self.trigger_ratio:
            return words, weights

        if not isinstance(words, numpy.ndarray):
            words = numpy.array(words)

        # Tail optimization does not help with very large vocabularies
        if len(words) > vocabulary_max * 2:
            indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
            indices = indices[-vocabulary_max:]
            words = words[indices]
            weights = weights[indices]
            return words, weights

        # Vocabulary typically consists of these three parts:
        # 1) the core - we found it's border - `core_end` - 15%
        # 2) the body - 70%
        # 3) the minor tail - 15%
        # (1) and (3) are roughly the same size
        # (3) can be safely discarded, (2) can be discarded with care,
        # (1) shall never be discarded.

        sorter = numpy.argsort(weights)[::-1]
        weights = weights[sorter]
        trend_start = int(len(weights) * 0.2)
        trend_finish = int(len(weights) * 0.8)
        z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
        exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
        avg_error = numpy.abs(weights[trend_start:trend_finish] -
        tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
        weights = weights[:-tail_size][:vocabulary_max]
        words = words[sorter[:-tail_size]][:vocabulary_max]

        return words, weights