Python scipy.sparse 模块,dok_matrix() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse.dok_matrix()

项目:deep_architect    作者:negrinho    | 项目源码 | 文件源码
def _compute_features(self, model):

        bls = [ b[0] for b in tuple(model.repr_model()) ]

        nfeats_other = 1
        nfeats_ngrams = len(self.module_ngram_to_id)
        nfeats = nfeats_other + nfeats_ngrams
        feats = sp.dok_matrix((1, nfeats), dtype=np.float32)

        # other features
        feats[0, 0] = len(bls)

        # ngrams features
        for k in xrange(1, self.ngram_maxlen):
            for i in xrange(len(bls) - k):
                ngram = tuple(bls[i:i + k])

                if ngram in self.module_ngram_to_id:
                    ngram_i = self.module_ngram_to_id[ngram]
                    feats_i = nfeats_other + ngram_i

                    feats[0, feats_i] += 1.0

        return sp.csr_matrix(feats)
项目:corporadb    作者:nlesc-sherlock    | 项目源码 | 文件源码
def buildMatrix(inputDict, inputFolder, outputMatrix):
    wordDict = gensim.corpora.Dictionary.load(inputDict)
    wordDict.filter_extremes()

    docs = glob(inputFolder + '/**/*.')
    nDocs = len(docs)
    nWords = len(wordDict)

    sp = sparse.dok_matrix((nWords, nDocs))
    for docId,doc in enumerate(docs):
        docTokens = loadTokens(doc)
        for wordIdx,wordCount in wordDict.doc2bow(docTokens):
            sp[wordIdx,docId] = wordCount
    print 'Words,Documents: ',(nWords, nDocs)
    mmwrite(outputMatrix, sp)

# Main script
项目:ldpop    作者:popgenmethods    | 项目源码 | 文件源码
def get_pi_c(self, popSize, theta, rho):
        if not self.exact:
            return numpy.array([0.0] * self.n + [1.0])
        n=self.n
        coalRate = 1. / popSize
        recomRate = float(rho) / 2.

        if rho == 0.0:
            return numpy.array([0.0] * self.n + [1.0])
        else:        
            numCoupledLinsRates = sparse.dok_matrix((n+1, n+1))
            for i in range(n+1):
                if i < n:
                    numCoupledLinsRates[i,i+1] = ((n-i)**2) * coalRate
                    numCoupledLinsRates[i,i] -= numCoupledLinsRates[i,i+1]
                if i > 0:
                    numCoupledLinsRates[i,i-1] = recomRate * i
                    numCoupledLinsRates[i,i] -= numCoupledLinsRates[i,i-1]
            return stationary1d_tridiagonal(numCoupledLinsRates)
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def _init_model(self):
        self.user_num, self.item_num = self.train_matrix.shape
        self.rating_mean = np.mean(self.train_matrix.values())
        self.predictions = dok_matrix((self.user_num, self.item_num))

        if self.config_handler['Output', 'is_load', 'bool']:
            self._load_model()
            assert(self.user_factors.shape[1] == self.item_factors.shape[1])
            self.factor_num = self.user_factors.shape[1]
        else:
            self.factor_num = self.config_handler['Parameters', 'factor_num', 'int']
            self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num)) * 0.1
            self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num)) * 0.1

            # Other Parameters
            self.learn_rate = self.config_handler['Parameters', 'learn_rate', 'float']
            self.momentum = self.config_handler['Parameters', 'momentum', 'float']
            self.user_lambda = self.config_handler['Parameters', 'user_lambda', 'float']
            self.item_lambda = self.config_handler['Parameters', 'item_lambda', 'float']

        # Momentum for update factors
        self.user_factors_inc = np.zeros((self.user_num, self.factor_num))
        self.item_factors_inc = np.zeros((self.item_num, self.factor_num))
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def read_data(self, filename):
        """
        read raw dataset, and convert to sparse matrix format.
        :param filename:
        """
        users, items = set(), set()
        ratings = list()
        with codecs.open(filename, mode="r", encoding="utf-8") as read_file:
            for line in read_file:
                user_item_rating = re.split('\t|,|::', line.strip())
                user_id = int(user_item_rating[0])
                item_id = int(user_item_rating[1])
                rating = int(user_item_rating[2])
                users.add(user_id)
                items.add(item_id)
                ratings.append((user_id, item_id, rating))

        # Convert
        user_num, item_num = len(users), len(items)
        users_dict = {user_id: index for index, user_id in enumerate(list(users))}
        items_dict = {item_id: index for index, item_id in enumerate(list(items))}
        data_model = dok_matrix((user_num, item_num))
        for user_id, item_id, rating in ratings:
            data_model[users_dict[user_id], items_dict[item_id]] = rating
        return data_model
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def initModel(self):
        ''''''
        self.numUsers, self.numItems = self.trainMatrix.shape()
        self.prediction = dok_matrix((self.numUsers, self.numItems))
        self.MAX_Iterations = int(self.configHandler.getParameter('AspectModel', 'MAX_Iterations'))
        self.numFactors = int(self.configHandler.getParameter('AspectModel', 'numFactors'))
        self.threshold = float(self.configHandler.getParameter('AspectModel', 'threshold'))

        self.X = np.random.uniform(0, 1, size=(self.numUsers, self.numFactors))      #  P(x|z)
        self.X = normalize(self.X)

        self.Y = np.random.uniform(0, 1, size=(self.numItems, self.numFactors))      #  P(y|z)
        self.Y = normalize(self.Y)

        self.Z = np.random.uniform(0, 1, size=self.numFactors)                       #  P(z)
        self.Z = normalize(self.Z)

        self.Q = np.zeros((self.numUsers, self.numFactors, self.numItems))   # P(z|x,y)
项目:multilingual-joint-embeddings    作者:dcferreira    | 项目源码 | 文件源码
def load_data(self, filepath, V):
        f = open(filepath)
        S = []
        for line in f:
            s = {}
            words = line.rstrip('\n').split(' ')
            for word in words:
                if word in V:
                    wid = V[word]
                    if wid in s:
                        s[wid] += 1
                    else:
                        s[wid] = 1
            S.append(s)
        f.close()

        # Transform to dok and to csr.
        Sdok = dok_matrix((len(S), len(V)), dtype=int)
        for n, s in enumerate(S):
            for wid in s:
                Sdok[n, wid] = s[wid]
        S = Sdok.tocsr()
        return S
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def predict(self, X):

        predictions = dok_matrix((X.shape[0], self.y.shape[1]), dtype=np.int)

        distances = self.base_classifier.predict_proba(X)
        topNIndices, topNDistances = self._get_top_labels(distances)

        for entry, (label_list, dist_list) in enumerate(zip(topNIndices, topNDistances)):
            for rank, label in enumerate(label_list):
                if not self.dependencies:
                    training_sample = [[rank, dist_list[rank]]]
                else:
                    training_sample = [distances[entry, :]]
                if label in self.meta_classifiers:
                    prediction = self.meta_classifiers[label].predict(training_sample)[0]
                    if prediction == 1:
                        predictions[entry, label] = 1

        return csr_matrix(predictions)
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def _a(self, neighbor_ids):
        result = sp.csr_matrix((0, self.y.shape[1]))
        for ns in neighbor_ids:
            neighbor_labels = self.y[ns]
            # By squeezing we support matrix output from scipy.sparse.sum and 1D array from np.sum
            labels_sum = np.squeeze(np.array(neighbor_labels.sum(0)))
            predicted_labels = sp.csr_matrix([np.floor(np.divide(labels_sum, len(ns)) + (1 - self.threshold))])
            # If there are no labels, we take the most frequent label.
            if predicted_labels.sum() == 0:
                divide = np.divide(labels_sum, len(ns))
                max_label = divide.argmax()
                predicted_labels = sp.dok_matrix((1, predicted_labels.shape[1]))
                predicted_labels[0, max_label] = 1
                predicted_labels = sp.csr_matrix(predicted_labels)

            result = sp.vstack((result, predicted_labels))
        return result
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def _b(self, neighbor_ids):
        result = sp.csr_matrix((0, self.y.shape[1]))
        for ns in neighbor_ids:
            average_label_nums = int(np.floor(np.mean([self.y[n].sum() for n in ns])))
            neighbor_labels = self.y[ns]
            labels_sum = np.array(neighbor_labels.sum(0))
            # By squeezing we support matrix output from scipy.sparse.sum and 1D array from np.sum
            divide = np.squeeze(np.divide(labels_sum, len(ns)))
            predicted_indices = np.argsort(divide)[-average_label_nums:]
            predicted_labels = sp.dok_matrix((1, len(divide)))
            # noinspection PyTypeChecker
            for index in predicted_indices:
                predicted_labels[0, index] = 1
            predicted_labels = sp.csr_matrix(predicted_labels)
            result = sp.vstack((result, predicted_labels))
        return result
项目:moviegeek    作者:practical-recommender-systems    | 项目源码 | 文件源码
def load_data(self):
        print('loading data')
        user_ids = list(
            Rating.objects.values('user_id')
                .annotate(movie_count=Count('movie_id'))
                .order_by('-movie_count'))
        content_ids = list(Rating.objects.values('movie_id').distinct())
        content_map = {content_ids[i]['movie_id']: i
                       for i in range(len(content_ids))}
        num_users = len(user_ids)
        user_ratings = dok_matrix((num_users,
                                   len(content_ids)),
                                  dtype=np.float32)
        for i in range(num_users):
            # each user corresponds to a row, in the order of all_user_names
            ratings = Rating.objects.filter(user_id=user_ids[i]['user_id'])
            for user_rating in ratings:
                user_ratings[i, content_map[user_rating.movie_id]] = user_rating.rating
        print('data loaded')

        return user_ids, user_ratings
项目:topicsketch    作者:linegroup    | 项目源码 | 文件源码
def infer_unit(self, _h):
        k = _NUM_TOPICS
        n = _SKETCH_BUCKET_SIZE

        m2 = dok_matrix((n, n), dtype=np.float64)
        m3 = dok_matrix((n, n), dtype=np.float64)

        container = self.sketch_m2[_h]
        for key, value in container.container.iteritems():
            i, j = self._inverse_index(key)

            m2[i,j] = value.get(self.timestamp)[2]
            if i != j:
                m2[j,i] = m2[i,j]

        container = self.sketch_m3[_h]
        for key, value in container.container.iteritems():
            i, j = self._inverse_index(key)

            m3[i,j] = value.get(self.timestamp)[2]
            if i != j:
                m3[j,i] = m3[i,j]

        return solver.solve(csr_matrix(m2), csr_matrix(m3), n, k)
项目:prep    作者:ysyushi    | 项目源码 | 文件源码
def update_theta(self):
        # compute w_over_tau_mu
        w_over_tau_mu = sp.dok_matrix(self.w)
        for ((s, t), value) in w_over_tau_mu.items():
            w_over_tau_mu[(s, t)] = 1.*self.w[(s, t)] / (self.tau[s] * self.mu[t])
        w_over_tau_mu = w_over_tau_mu.toarray()

        # objective function w.r.t. theta
        def cur_obj(x): return self.update_theta_obj(x, self.phi, w_over_tau_mu)

        # Jacobian w.r.t. theta
        def cur_jac(x): return self.update_theta_jac(x, self.phi, w_over_tau_mu)

        # optimize
        self.theta, _ = gradient_descent(self.theta, cur_obj, cur_jac, self.eta, self.delta_D*2., step_limit=1000, step_len_init=0.00001)

        # update phi*theta once
        self.update_phi_times_theta()
项目:VEP_TMScripts    作者:uwgraphics    | 项目源码 | 文件源码
def bow2matrix(bow, numDocs, numWords):
    s = dok_matrix((numWords, numDocs))
    for docNum in range(len(bow)):
        for wordId, count in bow[docNum]:
            s[wordId, docNum] = count
    return s
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def normalize(self):
        m2 = self.m.copy()
        m2.data **= 2
        norm = np.reciprocal(np.sqrt(np.array(m2.sum(axis=1))[:, 0]))
        normalizer = dok_matrix((len(norm), len(norm)))
        normalizer.setdiag(norm)
        self.m = normalizer.tocsr().dot(self.m)
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def read_counts_matrix(counts_path):
    """
    Reads the counts into a sparse matrix (CSR) from the count-word-context textual format.
    """
    words = load_count_vocabulary(counts_path + '.words.vocab')
    contexts = load_count_vocabulary(counts_path + '.contexts.vocab')
    words = list(words.keys())
    contexts = list(contexts.keys())
    iw = sorted(words)
    ic = sorted(contexts)
    wi = dict([(w, i) for i, w in enumerate(iw)])
    ci = dict([(c, i) for i, c in enumerate(ic)])

    counts = csr_matrix((len(wi), len(ci)), dtype=np.float32)
    tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32)
    update_threshold = 100000
    i = 0
    with open(counts_path) as f:
        for line in f:
            count, word, context = line.strip().split()
            if word in wi and context in ci:
                tmp_counts[wi[word], ci[context]] = int(count)
            i += 1
            if i == update_threshold:
                counts = counts + tmp_counts.tocsr()
                tmp_counts = dok_matrix((len(wi), len(ci)), dtype=np.float32)
                i = 0
    counts = counts + tmp_counts.tocsr()

    return counts, iw, ic
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def multiply_by_rows(matrix, row_coefs):
    normalizer = dok_matrix((len(row_coefs), len(row_coefs)))
    normalizer.setdiag(row_coefs)
    return normalizer.tocsr().dot(matrix)
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def multiply_by_columns(matrix, col_coefs):
    normalizer = dok_matrix((len(col_coefs), len(col_coefs)))
    normalizer.setdiag(col_coefs)
    return matrix.dot(normalizer.tocsr())
项目:CollMetric    作者:changun    | 项目源码 | 文件源码
def citeulike(tag_occurence_thres=10):
    user_dict = defaultdict(set)
    for u, item_list in enumerate(open("citeulike-t/users.dat").readlines()):
        items = item_list.strip().split(" ")
        # ignore the first element in each line, which is the number of items the user liked. 
        for item in items[1:]:
            user_dict[u].add(int(item))

    n_users = len(user_dict)
    n_items = max([item for items in user_dict.values() for item in items]) + 1

    user_item_matrix = dok_matrix((n_users, n_items), dtype=np.int32)
    for u, item_list in enumerate(open("citeulike-t/users.dat").readlines()):
        items = item_list.strip().split(" ")
        # ignore the first element in each line, which is the number of items the user liked. 
        for item in items[1:]:
            user_item_matrix[u, int(item)] = 1

    n_features = 0
    for l in open("citeulike-t/tag-item.dat").readlines():
        items = l.strip().split(" ")
        if len(items) >= tag_occurence_thres:
            n_features += 1
    print("{} features over tag_occurence_thres ({})".format(n_features, tag_occurence_thres))
    features = dok_matrix((n_items, n_features), dtype=np.int32)
    feature_index = 0
    for l in open("citeulike-t/tag-item.dat").readlines():
        items = l.strip().split(" ")
        if len(items) >= tag_occurence_thres:
            features[[int(i) for i in items], feature_index] = 1
            feature_index += 1

    return user_item_matrix, features
项目:CollMetric    作者:changun    | 项目源码 | 文件源码
def split_data(user_item_matrix, split_ratio=(3, 1, 1), seed=1):
    # set the seed to have deterministic results
    np.random.seed(seed)
    train = dok_matrix(user_item_matrix.shape)
    validation = dok_matrix(user_item_matrix.shape)
    test = dok_matrix(user_item_matrix.shape)
    # convert it to lil format for fast row access
    user_item_matrix = lil_matrix(user_item_matrix)
    for user in tqdm(range(user_item_matrix.shape[0]), desc="Split data into train/valid/test"):
        items = list(user_item_matrix.rows[user])
        if len(items) >= 5:

            np.random.shuffle(items)

            train_count = int(len(items) * split_ratio[0] / sum(split_ratio))
            valid_count = int(len(items) * split_ratio[1] / sum(split_ratio))

            for i in items[0: train_count]:
                train[user, i] = 1
            for i in items[train_count: train_count + valid_count]:
                validation[user, i] = 1
            for i in items[train_count + valid_count:]:
                test[user, i] = 1
    print("{}/{}/{} train/valid/test samples".format(
        len(train.nonzero()[0]),
        len(validation.nonzero()[0]),
        len(test.nonzero()[0])))
    return train, validation, test
项目:cobrame    作者:SBRG    | 项目源码 | 文件源码
def construct_s_matrix(self, growth_rate):
        """build the stoichiometric matrix at a specific growth rate"""
        # intialize to 0
        s = dok_matrix((len(self.metabolites), len(self.reactions)))
        # populate with stoichiometry
        for i, r in enumerate(self.reactions):
            for met, value in iteritems(r._metabolites):
                met_index = self.metabolites.index(met)
                if hasattr(value, "subs"):
                    s[met_index, i] = float(value.subs(mu, growth_rate))
                else:
                    s[met_index, i] = float(value)
        return s
项目:lazyarray    作者:NeuralEnsemble    | 项目源码 | 文件源码
def sparse_dok_matrices():
    dok = sparse.dok_matrix([[i, j, k], [l, m, n], [p, q, r]])
    #print "dok matrices ="
    #print dok
    return dok
项目:ldpop    作者:popgenmethods    | 项目源码 | 文件源码
def one_locus_probs(popSize, theta, n):
    coalRate = 1. / popSize
    mutRate = float(theta) / 2.

    numOnesRates = sparse.dok_matrix((n+1,n+1))
    for i in range(n+1):
        if i < n:
            numOnesRates[i,i+1] = (n-i) * mutRate + i * (n-i) / 2.0 * coalRate
            numOnesRates[i,i] -= numOnesRates[i,i+1]
        if i > 0:
            numOnesRates[i,i-1] = i * mutRate + i * (n-i) / 2.0 * coalRate
            numOnesRates[i,i] -= numOnesRates[i,i-1]

    return stationary1d_tridiagonal(numOnesRates)
项目:ldpop    作者:popgenmethods    | 项目源码 | 文件源码
def build_symmetries(self):
        start = time.time()

        # the index of the folded version in all_configs
        folded_list = get_folded_config_idxs(self)

        # foldedIdx = the index in folded_configs, allIdx = the index in all_configs
        foldedIdx_to_allIdx = numpy.array(list(set(folded_list)))

        allIdx_to_foldedIdx = {v:k for k,v in enumerate(foldedIdx_to_allIdx)}       
        allIdx_to_foldedIdx = [allIdx_to_foldedIdx[x] for x in folded_list]

        self.hash_to_foldedIdx = {k: allIdx_to_foldedIdx[v] for k,v in self.hash_to_allIdx.items()}
        self.folded_config_array = self.config_array[foldedIdx_to_allIdx,:,:]        

        self.numC = self.folded_config_array[:,0,0] + self.folded_config_array[:,0,1] + self.folded_config_array[:,1,0] + self.folded_config_array[:,1,1]

        symm_mat = sparse.dok_matrix((len(allIdx_to_foldedIdx), self.folded_config_array.shape[0]))
        symm_mat.update(dict(zip(enumerate(allIdx_to_foldedIdx), [1]*len(folded_list))))
        symm_mat = symm_mat.tocsc()

        antisymm_mat = symm_mat.transpose().tocsr(copy=True)
        # normalize rows
        self.n_unfolded_versions = numpy.array(antisymm_mat.sum(axis=1))[:,0]
        row_indices, col_indices = antisymm_mat.nonzero()
        antisymm_mat.data /= self.n_unfolded_versions[row_indices]

        self.symmetries = symm_mat.tocsr()
        self.antisymmetries = antisymm_mat.tocsr()

        logging.info("%f seconds to build symmetry matrices" % (time.time() - start))
项目:pytfa    作者:EPFL-LCSB    | 项目源码 | 文件源码
def create_generalized_matrix(tmodel, array_type = 'dense'):
    """
    Returns the generalized stoichiomatric matrix used for TFA

    :param tmodel: pytfa.ThermoModel

    :returns: matrix.
    """

    if array_type not in ('DataFrame', 'dense') and not dok_matrix:
        raise ValueError('Sparse matrices require scipy')

    dtype = np.float64

    array_constructor = {'dense': np.zeros, 'dok': dok_matrix,
        'lil': lil_matrix, 'DataFrame': np.zeros, }

    n_constraints = len(tmodel.constraints)
    n_variables = len(tmodel.variables)
    array = array_constructor[array_type]((n_constraints, n_variables),
                                          dtype=dtype)

    c_ind = {x:e for e,x in enumerate(tmodel.constraints)}
    v_ind = {x:e for e,x in enumerate(tmodel.variables)}

    for this_cons in tmodel.constraints:
        var_coeff_dict = this_cons.get_linear_coefficients(this_cons.variables)

        for this_var,coeff in var_coeff_dict.items():
            array[c_ind[this_cons], v_ind[this_var]] = coeff

    if array_type == 'DataFrame':
        metabolite_ids = [met.id for met in tmodel.constraints]
        reaction_ids = [rxn.id for rxn in tmodel.variables]
        return pd.DataFrame(array, index=metabolite_ids, columns=reaction_ids)

    else:
        return array
项目:PyPSA    作者:PyPSA    | 项目源码 | 文件源码
def find_tree(sub_network, weight='x_pu'):
    """Get the spanning tree of the graph, choose the node with the
    highest degree as a central "tree slack" and then see for each
    branch which paths from the slack to each node go through the
    branch.

    """

    branches_bus0 = sub_network.branches()["bus0"]
    branches_i = branches_bus0.index
    buses_i = sub_network.buses_i()

    graph = sub_network.graph(weight=weight)
    sub_network.tree = nx.minimum_spanning_tree(graph)

    #find bus with highest degree to use as slack
    tree_slack_bus, slack_degree = max(degree(sub_network.tree), key=itemgetter(1))
    logger.info("Tree slack bus is %s with degree %d.", tree_slack_bus, slack_degree)

    #determine which buses are supplied in tree through branch from slack

    #matrix to store tree structure
    sub_network.T = dok_matrix((len(branches_i),len(buses_i)))

    for j,bus in enumerate(buses_i):
        path = nx.shortest_path(sub_network.tree,bus,tree_slack_bus)
        for i in range(len(path)-1):
            branch = next(iterkeys(graph[path[i]][path[i+1]]))
            branch_i = branches_i.get_loc(branch)
            sign = +1 if branches_bus0.iat[branch_i] == path[i] else -1
            sub_network.T[branch_i,j] = sign
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def initModel(self):
        self.numUsers, self.numItems = self.trainMatrix.shape()
        self.prediction = dok_matrix((self.numUsers, self.numItems))
        self.MAX_Iterations = int(self.configHandler.getParameter('BPMF', 'MAX_Iterations'))
        self.numFactors = int(self.configHandler.getParameter('BPMF', 'numFactors'))

        self.beta0 = float(self.configHandler.getParameter('BPMF', 'beta0'))
        self.nu0 = float(self.configHandler.getParameter('BPMF', 'nu0'))
        self.wh0 = np.eye(self.numFactors)

        self.learnRate = float(self.configHandler.getParameter('BPMF', 'learning_rate'))
        self.regU = float(self.configHandler.getParameter('BPMF', 'regU'))
        self.regI = float(self.configHandler.getParameter('BPMF', 'regI'))

        self.P = np.random.normal(0, 1, size=(self.numUsers, self.numFactors))
        self.Q = np.random.normal(0, 1, size=(self.numItems, self.numFactors))

        self.alpha = 2
        self.alpha_k = self.alpha/self.numFactors

        self.numRatings = 5

        self.theta = np.random.dirichlet(np.array([self.alpha_k for i in range(self.numFactors)]))
        self.gamma = np.zeros((self.numUsers, self.numFactors, self.numItems))

        self.sigma = np.random.normal(0, 1, size = self.numRatings)
        self.omega = np.random.normal(0, 1, size = self.numUsers)

        self.mu_vd = 1.0 / (1.0 + np.exp(-(self.omega[newaxis, ...] + self.sigma[..., newaxis])))

        self.xi = 10.0
        self.nu = 10.0
        self.phi = 2.0
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def initModel(self):
        ''' Read the model parameters, and get some common values.
        '''
        self.numUsers, self.numItems = self.trainMatrix.shape()
        self.prediction = dok_matrix((self.numUsers, self.numItems))
        self.MAX_Iterations = int(self.configHandler.getParameter('BPoissMF', 'MAX_Iterations'))
        self.numFactors = int(self.configHandler.getParameter('BPoissMF', 'numFactors'))
        self.threshold = float(self.configHandler.getParameter('BPoissMF', 'threshold'))

        # Get the Parameters
        self.user_alpha = float(self.configHandler.getParameter('BPoissMF', 'user_alpha'))
        self.user_c = float(self.configHandler.getParameter('BPoissMF', 'user_c'))

        self.item_a = float(self.configHandler.getParameter('BPoissMF', 'item_a'))
        self.item_b = float(self.configHandler.getParameter('BPoissMF', 'item_b'))

        # The model parameters for users
        self.gamma0 = np.zeros(self.numUsers)
        self.gamma1 = np.zeros(self.numUsers)
        self.s = np.zeros(self.numUsers)
        self.nu = np.zeros((self.numUsers, self.numFactors))
        self.theta = np.zeros((self.numUsers, self.numFactors))

        # The model parameters for stick proportions
        self.tau = np.zeros((self.numUsers, self.numFactors))

        # The model parameters for item weights
        self.lambda0 = np.zeros((self.numItems, self.numFactors))
        self.lambda1 = np.zeros((self.numItems, self.numFactors))
        self.beta = np.zeros((self.numItems, self.numFactors))

        self.z = np.zeros((self.numUsers, self.numItems))

        self.pi = np.zeros((self.numUsers, self.numItems))
        self.logPi = np.zeros((self.numUsers, self.numItems))
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def _init_model(self):
        self.user_num, self.item_num = self.train_matrix.shape
        self.mean_rating = np.mean(self.train_matrix.values())

        self.predictions = dok_matrix((self.user_num, self.item_num))

        if self.config_handler['Output', 'is_load', 'bool']:
            self._load_model()
            assert(self.user_factors.shape[1] == self.item_factors.shape[1])
            self.factor_num = self.user_factors.shape[1]
        else:
            self._read_cfg()

        if self.config_handler['Parameters', 'is_init_path', 'bool']:
            self._load_init_model()
        else:
            self.factor_num = self.config_handler['Parameters', 'factor_num', 'int']
            self.user_factors = np.random.normal(0, 1, size=(self.user_num, self.factor_num))
            self.item_factors = np.random.normal(0, 1, size=(self.item_num, self.factor_num))

        self.markov_num = 0
        validation_rmse, test_rmse = self.__evaluate_epoch__()
        self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(0, validation_rmse, test_rmse))

        self.user_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.user_normal_dist_mu0_init
        self.user_normal_dist_beta0 = self.user_normal_dist_beta0_init
        self.user_Wishart_dist_W0 = np.eye(self.factor_num) * self.user_Wishart_dist_W0_init
        self.user_Wishart_dist_nu0 = self.factor_num

        self.item_normal_dist_mu0 = np.zeros(self.factor_num, np.float) + self.item_normal_dist_mu0_init
        self.item_normal_dist_beta0 = self.item_normal_dist_beta0_init
        self.item_Wishart_dist_W0 = np.eye(self.factor_num) * self.item_Wishart_dist_W0_init
        self.item_Wishart_dist_nu0 = self.factor_num

        self.rating_sigma = self.rating_sigma_init
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def _build_model(self):
        user_train_matrix = dict()
        item_train_matrix = dict()
        for user_id, item_id in self.train_matrix.keys():
            user_train_matrix.setdefault(user_id, dok_matrix((1, self.item_num)))
            user_train_matrix[user_id][0, item_id] = self.train_matrix.get((user_id, item_id))
            item_train_matrix.setdefault(item_id, dok_matrix((1, self.user_num)))
            item_train_matrix[item_id][0, user_id] = self.train_matrix.get((user_id, item_id))

        self.previous_loss = -np.inf
        max_iterations = self.config_handler['Parameters', 'max_iterations', 'int']
        for iteration in range(max_iterations):
            self.logger['Process'].debug('Epoch {0}: update hyper-parameters'.format(iteration))
            user_factors_mu, user_factors_variance = \
                self._sampling_hyperparameters(self.user_factors, self.user_normal_dist_mu0, self.user_normal_dist_beta0,
                                               self.user_Wishart_dist_nu0, self.user_Wishart_dist_W0)
            item_factors_mu, item_factors_variance = \
                self._sampling_hyperparameters(self.item_factors, self.item_normal_dist_mu0, self.item_normal_dist_beta0,
                                              self.item_Wishart_dist_nu0, self.item_Wishart_dist_W0)

            self.logger['Process'].debug('Epoch {0}: update latent factors'.format(iteration))
            for gibbs_iteration in range(2):
                for user_id in range(self.user_num):
                    user_ratings = user_train_matrix[user_id] if user_id in user_train_matrix else dict()
                    if len(user_ratings.keys()) == 0:
                        continue
                    self.user_factors[user_id] = self._update_parameters(
                        self.item_factors, user_ratings, user_factors_mu, user_factors_variance)

                for item_id in range(self.item_num):
                    item_ratings = item_train_matrix[item_id] if item_id in item_train_matrix else dict()
                    if len(item_ratings.keys()) == 0:
                        continue
                    self.item_factors[item_id] = self._update_parameters(
                        self.user_factors, item_ratings, item_factors_mu, item_factors_variance)

                validation_rmse, test_rmse = self.__evaluate_epoch__()
                self.logger['Process'].debug('Epoch {0}: Training RMSE - {1}, Testing RMSE - {2}'.format(iteration, validation_rmse, test_rmse))
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def initModel(self):
        self.numUsers, self.numItems = self.trainMatrix.shape()
        self.prediction = dok_matrix((self.numUsers, self.numItems))
        self.MAX_Iterations = int(self.configHandler.getParameter('BPoissMF', 'MAX_Iterations'))
        self.numFactors = int(self.configHandler.getParameter('BPoissMF', 'numFactors'))
        self.threshold = float(self.configHandler.getParameter('BPoissMF', 'threshold'))

        # Get the Parameters
        self.a = float(self.configHandler.getParameter('BPoissMF', 'a'))
        self.ap = float(self.configHandler.getParameter('BPoissMF', 'ap'))
        self.bp = float(self.configHandler.getParameter('BPoissMF', 'bp'))

        self.c = float(self.configHandler.getParameter('BPoissMF', 'c'))
        self.cp = float(self.configHandler.getParameter('BPoissMF', 'cp'))
        self.dp = float(self.configHandler.getParameter('BPoissMF', 'dp'))

        # Init xi
        self.xi = gammaRnd(self.ap, self.ap/self.bp, size=self.numUsers)
        # Init theta
        self.theta = np.zeros((self.numUsers, self.numFactors))
        for i in range(self.numUsers):
            self.theta[i, :] = gammaRnd(self.a, self.xi[i])

        # Init eta
        self.eta = gammaRnd(self.cp, self.cp/self.dp, size=self.numItems)
        #Init beta
        self.beta = np.zeros((self.numItems, self.numFactors))
        for i in range(self.numItems):
            self.beta[i, :] = gammaRnd(self.c, self.eta[i])

        # Init z
        self.zs = np.zeros((self.numUsers, self.numItems, self.numFactors))
        for user_id, item_id in self.trainMatrix.keys():
            p = self.theta[user_id, :] * self.beta[item_id, :]
            p /= np.sum(p)
            self.zs[user_id, item_id, :] = np.random.multinomial(self.trainMatrix[user_id, item_id], p)
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def initModel(self):
        ''''''
        self.numUsers, self.numItems = self.trainMatrix.shape()
        self.prediction = dok_matrix((self.numUsers, self.numItems))
        self.MAX_Iterations = int(self.configHandler.getParameter('CTR', 'MAX_Iterations'))
        self.numFactors = int(self.configHandler.getParameter('CTR', 'numFactors'))
        self.threshold = float(self.configHandler.getParameter('CTR', 'threshold'))

        self.U = np.zeros((self.numUsers, self.numFactors))
        self.V = np.zeros((self.numItems, self.numFactors))
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def get_train_matrix(self):
        if len(self.train_data.shape) == 2:
            return self.train_data
        train_matrix = dok_matrix((self.train_data.shape[0], self.train_data.shape[1]))
        for key in self.train_data.keys():
            train_matrix[key[0], key[1]] = self.train_data[key]
        return train_matrix
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def tensor_matrix(self, tensor_data):
        user_num, item_num = tensor_data.shape[0], tensor_data.shape[1]
        matrix_data = dok_matrix((user_num, item_num))
        for user_id, item_id, time_id in tensor_data.keys():
            matrix_data[user_id, item_id] += tensor_data.get((user_id, item_id, time_id))
        return matrix_data
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def read_given_train_test(self, train_file, test_file):
        """
        read given data set
        """
        users, items = set(), set()
        ratings = list()
        with codecs.open(train_file, mode="r", encoding="utf-8") as read_file:
            for line in read_file:
                user_item_rating = re.split('\t|,|::', line.strip())
                user_id = int(user_item_rating[0])
                item_id = int(user_item_rating[1])
                rating = int(user_item_rating[2])
                users.add(user_id)
                items.add(item_id)
                ratings.append((user_id, item_id, rating))

        # Convert
        user_num, item_num = len(users), len(items)
        users_dict = {user_id: index for index, user_id in enumerate(list(users))}
        items_dict = {item_id: index for index, item_id in enumerate(list(items))}
        train_matrix = dok_matrix((user_num, item_num))
        test_matrix = dok_matrix((user_num, item_num))
        for user_id, item_id, rating in ratings:
            train_matrix[users_dict[user_id], items_dict[item_id]] = rating

        with codecs.open(test_file, mode='r', encoding='utf-8') as read_file:
            for line in read_file:
                user_item_rating = re.split('\t|,|::', line.strip())
                user_id = int(user_item_rating[0])
                item_id = int(user_item_rating[1])
                rating = int(user_item_rating[2])
                test_matrix[users_dict[user_id], items_dict[item_id]] = rating
        return train_matrix, test_matrix
项目:GraphicalModelForRecommendation    作者:AlgorithmFan    | 项目源码 | 文件源码
def initModel(self):
        self.numUsers, self.numItems = self.trainMatrix.shape()
        self.prediction = dok_matrix((self.numUsers, self.numItems))
        self.MAX_Iterations = int(self.configHandle.getParameter('PMF', 'MAX_Iterations'))
项目:nlp    作者:Shmuma    | 项目源码 | 文件源码
def __init__(self, dict_size, file_name, shuffle_buffer_size):
        self.dict_size = dict_size
        self.data = sparse.dok_matrix((dict_size, dict_size), dtype=np.uint32)
项目:soinn    作者:fukatani    | 项目源码 | 文件源码
def setUp(self):
        self.soinn = Soinn()
        self.soinn.nodes = np.array([[0, 0], [1, 0], [1, 1], [0, 1]], dtype=np.float64)
        self.soinn.adjacent_mat = dok_matrix((4, 4))
        self.soinn.winning_times = [1] * 4
项目:soinn    作者:fukatani    | 项目源码 | 文件源码
def test_increment_edge_ages(self):
        self.soinn.adjacent_mat[0, 1:3] = 1
        self.soinn.adjacent_mat[1:3, 0] = 1
        self.soinn._Soinn__increment_edge_ages(0)
        expected = dok_matrix([[0, 2, 2, 0], [2, 0, 0, 0], [2, 0, 0, 0], [0, 0, 0, 0]])
        np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected.toarray())
        self.soinn._Soinn__increment_edge_ages(1)
        expected = dok_matrix([[0, 3, 2, 0], [3, 0, 0, 0], [2, 0, 0, 0], [0, 0, 0, 0]])
        np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected.toarray())
项目:soinn    作者:fukatani    | 项目源码 | 文件源码
def test_delete_old_edges(self):
        self.soinn.winning_times = [i for i in range(4)]
        m = self.soinn.max_edge_age
        self.soinn.adjacent_mat[[0, 1], [1, 0]] = m + 2
        self.soinn.adjacent_mat[[0, 2], [2, 0]] = m + 1
        self.soinn._Soinn__delete_old_edges(0)
        actual = self.soinn.adjacent_mat.toarray()
        expected = dok_matrix([[0, m+1, 0], [m+1, 0, 0], [0, 0, 0]]).toarray()
        np.testing.assert_array_equal(actual, expected)
        expected = np.array([[0, 0], [1, 1], [0, 1]], dtype=np.float64)
        np.testing.assert_array_equal(self.soinn.nodes, expected)
        self.assertEqual(self.soinn.winning_times, [0, 2, 3])
项目:soinn    作者:fukatani    | 项目源码 | 文件源码
def test_delete_old_edges_with_deleting_no_node(self):
        # No node is deleted by the function
        self.soinn.winning_times = [i for i in range(4)]
        m = self.soinn.max_edge_age
        self.soinn.adjacent_mat[[0, 1], [1, 0]] = m + 2
        self.soinn.adjacent_mat[[1, 2], [2, 1]] = 1
        previous_nodes = self.soinn.nodes
        previous_winning_times = self.soinn.winning_times
        self.soinn._Soinn__delete_old_edges(0)
        actual = self.soinn.adjacent_mat.toarray()
        expected = dok_matrix([[0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 0]]).toarray()
        np.testing.assert_array_equal(actual, expected)
        np.testing.assert_array_equal(self.soinn.nodes, previous_nodes)
        self.assertEqual(self.soinn.winning_times, previous_winning_times)
项目:soinn    作者:fukatani    | 项目源码 | 文件源码
def test_delete_old_edges_with_deleting_several_nodes(self):
        # delete several nodes simultaneously
        self.soinn.winning_times = [i for i in range(4)]
        m = self.soinn.max_edge_age
        self.soinn.adjacent_mat[[0, 1, 0, 3], [1, 0, 3, 0]] = m + 2
        self.soinn.adjacent_mat[[0, 2], [2, 0]] = m + 1
        self.soinn._Soinn__delete_old_edges(0)
        actual = self.soinn.adjacent_mat.toarray()
        expected = dok_matrix([[0, m+1], [m+1, 0]]).toarray()
        np.testing.assert_array_equal(actual, expected)
        self.assertEqual(self.soinn.winning_times, [0, 2])
项目:soinn    作者:fukatani    | 项目源码 | 文件源码
def test_delete_nodes_with_deleting_several_nodes(self):
        # delete several nodes simultaneously
        self.soinn.winning_times = [i for i in range(4)]
        self.soinn.adjacent_mat[[0, 1], [1, 0]] = 1
        self.soinn.adjacent_mat[[2, 3], [3, 2]] = 2
        self.soinn._Soinn__delete_nodes([1, 3])
        expected = np.array([[0, 0], [1, 1]], dtype=np.float64)
        np.testing.assert_array_equal(self.soinn.nodes, expected)
        self.assertEqual(self.soinn.winning_times, [0, 2])
        expected = dok_matrix((2, 2)).toarray()
        np.testing.assert_array_equal(self.soinn.adjacent_mat.toarray(), expected)
项目:ngram2vec    作者:zhezhaoa    | 项目源码 | 文件源码
def multiply_by_rows(matrix, row_coefs):
    normalizer = dok_matrix((len(row_coefs), len(row_coefs)))
    normalizer.setdiag(row_coefs)
    return normalizer.tocsr().dot(matrix)
项目:ngram2vec    作者:zhezhaoa    | 项目源码 | 文件源码
def multiply_by_columns(matrix, col_coefs):
    normalizer = dok_matrix((len(col_coefs), len(col_coefs)))
    normalizer.setdiag(col_coefs)
    return matrix.dot(normalizer.tocsr())
项目:ngram2vec    作者:zhezhaoa    | 项目源码 | 文件源码
def normalize(self):
        m2 = self.m.copy()
        m2.data **= 2
        norm = np.reciprocal(np.sqrt(np.array(m2.sum(axis=1))[:, 0]))
        normalizer = dok_matrix((len(norm), len(norm)))
        normalizer.setdiag(norm)
        self.m = normalizer.tocsr().dot(self.m)
项目:discreteMarkovChain    作者:gvanderheide    | 项目源码 | 文件源码
def directInitialMatrix(self):   
        """
        We generate an initial sparse matrix with all the transition rates (or probabilities).
        We later transform this matrix into a rate or probability matrix depending on the preferred method of obtaining pi.
        """

        #First initialize state codes and the mapping with states. 
        self.setStateCodes()  

        #For each state, calculate the indices of reached states and rates using the transition function.
        results  = imap(self.transitionStates, self.mapping.values())

         #Simpler alternative that uses less memory. 
         #Would be competitive if the conversion from dok to csr is faster.  
#        D = dok_matrix((self.size,self.size),dtype=float)
#        for index,(col,rate) in enumerate(results):
#            D.update({(index,c): r for c,r in zip(col,rate)})
#        return D.tocsr()

        #preallocate memory for the rows, cols and rates of the sparse matrix      
        rows = np.empty(self.size,dtype=int)
        cols = np.empty(self.size,dtype=int)
        rates = np.empty(self.size,dtype=float)

        #now fill the arrays with the results, increasing their size if current memory is too small.
        right = 0
        for index,(col,rate) in enumerate(results): #more robust alternative: in izip(self.mapping.keys(),results)
            left = right
            right += len(col)
            if right >= len(cols):
                new_capacity = int(round(right * 1.5))  #increase the allocated memory if the vectors turn out to be too small.
                cols.resize(new_capacity)
                rates.resize(new_capacity)
                rows.resize(new_capacity)
            rows[left:right] = index #since states are sorted, the index indeed corresponds to the state.
            cols[left:right] = col
            rates[left:right] = rate   

        #Place all data in a coo_matrix and convert to a csr_matrix for quick computations.
        return coo_matrix((rates[:right],(rows[:right],cols[:right])),shape=(self.size,self.size)).tocsr()
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def imitate_tr(self, graph, root):
        def tr():
            pass

        tr.nx_graph = graph
        tr.nx_root = root
        return tr
        #
        # def test_speed(self):
        #     _, _, tr = load_dataset('econ62k')
        #     graph = tr.nx_graph
        #
        #     def random_labels():
        #         def set_random_ones(n_nodes):
        #             ids = np.random.choice(n_nodes, 5)
        #             zeros = sp.dok_matrix((1, n_nodes), dtype=np.bool_)
        #             for index in ids:
        #                 zeros[0, index] = True
        #             return zeros
        #
        #         number_of_nodes = graph.number_of_nodes()
        #         matrix = set_random_ones(number_of_nodes)
        #         for i in range(0, 62000):
        #             zeros = set_random_ones(number_of_nodes)
        #             matrix = sp.vstack((matrix, zeros))
        #         return sp.csr_matrix(matrix)
        #
        #     y_true = random_labels()
        #     y_pred = random_labels()
        #     print('random constructed')
        #
        #     start = default_timer()
        #     hierarchical_f_measure(graph, y_true, y_pred)
        #     print(default_timer() - start)
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def _make_sparse(self, scores):
        n_features = len(self.vocabulary)
        result = sp.csr_matrix((0, n_features))
        for score in scores:
            sparse_score = sp.dok_matrix((1, n_features))
            for s in score.items():
                sparse_score[0, self.vocabulary[s[0]]] = s[1]
            result = sp.vstack((result, sp.csr_matrix(sparse_score)))
        return result
项目:reinforcement_learning    作者:nishio    | 项目源码 | 文件源码
def init_Q():
    from scipy.sparse import dok_matrix
    return dok_matrix((17 ** 16, 16 * 16))