Python scipy.sparse 模块,hstack() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse.hstack()

项目:kaggle-review    作者:daxiongshu    | 项目源码 | 文件源码
def onehot_encode_bar(tr,te,cols=None,bar=10000):
    if cols is None:
        cols = [i for i in tr.columns.values if i in te.columns.values]
    vec = DictVectorizer()
    cat,num = [],[]
    for col in cols:
        nu = tr[col].unique().shape[0]
        if (nu<bar and nu>2) or tr[col].dtype=='object':
            cat.append(col)
            tr[col] = tr[col].map(str)
            te[col] = te[col].map(str)
        else:
            num.append(col)
    print("start fitting num of cat features:",len(cat))
    X = vec.fit_transform(tr[cat].T.to_dict().values())
    Xt = vec.transform(te[cat].T.to_dict().values())
    print("done fitting",X.shape,Xt.shape)
    X = sparse.hstack([X,tr[num].values],format='csr')
    Xt = sparse.hstack([Xt,te[num].values],format='csr') 
    return X,Xt
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def aveE2CC(self):
        "Construct the averaging operator on cell edges to cell centers."
        if getattr(self, '_aveE2CC', None) is None:
            # The number of cell centers in each direction
            n = self.vnC
            if self.isSymmetric:
                avR = utils.av(n[0])[:, 1:]
                avR[0, 0] = 1.
                self._aveE2CC = sp.kron(utils.av(n[2]), avR, format="csr")
            else:
                raise NotImplementedError('wrapping in the averaging is not '
                                          'yet implemented')
                # self._aveE2CC = (1./3)*sp.hstack((utils.kron3(utils.av(n[2]),
                #                                               utils.av(n[1]),
                #                                               utils.speye(n[0])),
                #                                   utils.kron3(utils.av(n[2]),
                #                                               utils.speye(n[1]),
                #                                               utils.av(n[0])),
                #                                   utils.kron3(utils.speye(n[2]),
                #                                               utils.av(n[1]),
                #                                               utils.av(n[0]))),
                #                                  format="csr")
        return self._aveE2CC
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def faceDiv(self):
        """
        Construct divergence operator (face-stg to cell-centres).
        """
        if getattr(self, '_faceDiv', None) is None:
            n = self.vnC
            # Compute faceDivergence operator on faces
            if(self.dim == 1):
                D = ddx(n[0])
            elif(self.dim == 2):
                D1 = sp.kron(speye(n[1]), ddx(n[0]))
                D2 = sp.kron(ddx(n[1]), speye(n[0]))
                D = sp.hstack((D1, D2), format="csr")
            elif(self.dim == 3):
                D1 = kron3(speye(n[2]), speye(n[1]), ddx(n[0]))
                D2 = kron3(speye(n[2]), ddx(n[1]), speye(n[0]))
                D3 = kron3(ddx(n[2]), speye(n[1]), speye(n[0]))
                D = sp.hstack((D1, D2, D3), format="csr")
            # Compute areas of cell faces & volumes
            S = self.area
            V = self.vol
            self._faceDiv = sdiag(1/V)*D*sdiag(S)
        return self._faceDiv
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_invXXXBlockDiagonal(self):
        a = [np.random.rand(5, 1) for i in range(4)]

        B = inv2X2BlockDiagonal(*a)

        A = sp.vstack((sp.hstack((sdiag(a[0]), sdiag(a[1]))),
                       sp.hstack((sdiag(a[2]), sdiag(a[3])))))

        Z2 = B*A - sp.identity(10)
        self.assertTrue(np.linalg.norm(Z2.todense().ravel(), 2) < TOL)

        a = [np.random.rand(5, 1) for i in range(9)]
        B = inv3X3BlockDiagonal(*a)

        A = sp.vstack((sp.hstack((sdiag(a[0]), sdiag(a[1]),  sdiag(a[2]))),
                       sp.hstack((sdiag(a[3]), sdiag(a[4]),  sdiag(a[5]))),
                       sp.hstack((sdiag(a[6]), sdiag(a[7]),  sdiag(a[8])))))

        Z3 = B*A - sp.identity(15)

        self.assertTrue(np.linalg.norm(Z3.todense().ravel(), 2) < TOL)
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_FaceInnerProductAnisotropicDeriv(self):

        def fun(x):
            # fake anisotropy (testing anistropic implementation with isotropic
            # vector). First order behavior expected for fully anisotropic
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([eye, zero, eye])])

            MfSig = self.mesh.getFaceInnerProduct(x)
            MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0)
            return MfSig*self.face_vec ,  MfSigDeriv(self.face_vec) * P.T

        print('Testing FaceInnerProduct Anisotropic')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                               tolerance=TOLD, plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_FaceInnerProductAnisotropicDerivInvProp(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([eye, zero, eye])])

            MfSig = self.mesh.getFaceInnerProduct(x, invProp=True)
            MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0,
                                                            invProp=True)
            return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T

        print('Testing FaceInnerProduct Anisotropic InvProp')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_FaceInnerProductAnisotropicDerivInvMat(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([eye, zero, eye])])

            MfSig = self.mesh.getFaceInnerProduct(x, invMat=True)
            MfSigDeriv = self.mesh.getFaceInnerProductDeriv(x0, invMat=True)
            return MfSig*self.face_vec, MfSigDeriv(self.face_vec) * P.T

        print('Testing FaceInnerProduct Anisotropic InvMat')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDeriv(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x.reshape(self.mesh.nC, 3))
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDerivInvProp(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x, invProp=True)
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0, invProp=True)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic InvProp')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDerivInvMat(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x, invMat=True)
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0, invMat=True)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic InvMat')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def test_EdgeInnerProductAnisotropicDerivInvPropInvMat(self):

        def fun(x):
            x = np.repeat(np.atleast_2d(x), 3, axis=0).T
            x0 = np.repeat(self.x0, 3, axis=0).T

            zero = sp.csr_matrix((self.mesh.nC, self.mesh.nC))
            eye = sp.eye(self.mesh.nC)
            P = sp.vstack([sp.hstack([zero, eye, zero])])

            MeSig = self.mesh.getEdgeInnerProduct(x, invProp=True, invMat=True)
            MeSigDeriv = self.mesh.getEdgeInnerProductDeriv(x0,
                                                            invProp=True,
                                                            invMat=True)
            return MeSig*self.edge_vec, MeSigDeriv(self.edge_vec) * P.T

        print('Testing EdgeInnerProduct Anisotropic InvProp InvMat')
        return self.assertTrue(Tests.checkDerivative(fun, self.x0, num=7,
                                                     tolerance=TOLD,
                                                     plotIt=False))
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def align_vectors(instances, target_vocabulary, source_vocabulary):

    source_feature_indices = dict([(feature, i) for i, feature in enumerate(source_vocabulary)])
    target_feature_indices = dict([(feature, i) for i, feature in enumerate(target_vocabulary)])
    keep_features = list(set(source_vocabulary).intersection(set(target_vocabulary)))
    transform_dict = dict([(target_feature_indices[feature], source_feature_indices[feature]) for feature in keep_features])
    num_instances = instances.shape[0]
    columns = []
    lt = len(target_vocabulary)
    for i,index in enumerate(range(lt)):
        try:
            columns.append(instances.getcol(transform_dict[index]))
        except:
            columns.append(sparse.csr_matrix([[0]] * num_instances))
    aligned_vectors = sparse.hstack(columns).tocsr()
    return aligned_vectors
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def return_instances(self, helpernames):
        """
        Information extractor
        =====
        Function to extract featurized instances in any combination of feature types

        Parameters
        ------
        helpernames : list
            List of the feature types to combine
            Names of feature types correspond with the keys of self.modules

        Returns
        -----
        instances : scipy csr matrix
            Featurized instances
        Vocabulary : list
            List with the feature name per index
        """
        submatrices = [self.features[name] for name in helpernames]
        instances = sparse.hstack(submatrices).tocsr()
        vocabulary = np.hstack([self.vocabularies[name] for name in helpernames])
        return instances, vocabulary
项目:grove    作者:rigetticomputing    | 项目源码 | 文件源码
def __init__(self, labels_ops):
        """
        Encapsulates a set of linearly independent operators.

        :param (list|tuple) labels_ops: Sequence of tuples (label, operator) where label is a string
            and operator a qutip.Qobj operator representation.
        """
        self.ops_by_label = OrderedDict(labels_ops)
        self.labels = list(self.ops_by_label.keys())
        self.ops = list(self.ops_by_label.values())
        self.dim = len(self.ops)

        # the basis change transformation matrix from a representation in the operator basis
        # to the original basis. We enforce CSR sparse matrix representation to have efficient
        # matrix vector products.
        self.basis_transform = sphstack([qt.operator_to_vector(opj).data
                                         for opj in self.ops]).tocsr()
        self._metric = None
        self._is_orthonormal = None
        self._all_hermitian = None
项目:corpus-to-graph-ml    作者:CatalystCode    | 项目源码 | 文件源码
def get_compound_features(train_data, test_data, feature_gen_methods):
    train_features_list = []
    test_features_list = []

    for m in feature_gen_methods:
        train_features, test_features = m(train_data, test_data)
        train_features_list.append(train_features)
        test_features_list.append(test_features)

    train_features = train_features_list[0]
    test_features = test_features_list[0]

    for i in xrange(1,len(feature_gen_methods)):
        train_features = hstack((train_features, train_features_list[i]))
        test_features = hstack((test_features, test_features_list[i]))

    return train_features, test_features
项目:blmath    作者:bodylabs    | 项目源码 | 文件源码
def test_cholmod(self):
        A, chol_L, _, cv = pickle.load(vc('/unittest/linalg/cholmod.pkl'))

        c_data = np.ones(len(cv))/len(cv)
        c_rows = cv.flatten()
        c_cols = (np.zeros(len(cv))).astype(np.int32)
        c = sp.csc_matrix((c_data, (c_rows, c_cols)), shape=(A.shape[0], 1))
        Ac = sp.hstack([A, c], format='csc')

        AAc = Ac.dot(Ac.T)

        [chol_L_comp, L_nonpsd, chol_S_comp] = lchol.lchol(AAc)

        right = chol_S_comp.T.dot(AAc.dot(chol_S_comp))
        left = chol_L_comp.dot(chol_L_comp.T)

        self.assertTrue(sum((abs(right-left)).data))  # it's a reordered LLt decomposition
        self.assertEqual(sp.triu(chol_L, k=1).nnz, 0) # it's lower triangular'
        self.assertEqual(L_nonpsd, 0)                 # the input is positive definite
        # self.assertTrue(sum((abs(chol_L - chol_L_comp)).data) < 1e-1)
        # self.assertTrue(sum((abs(chol_S - chol_S_comp)).data) < 1e-1)
项目:Kaggler    作者:qqgeogor    | 项目源码 | 文件源码
def transform(self, X):
        """Encode categorical columns into sparse matrix with one-hot-encoding.

        Args:
            X (numpy.array): categorical columns to encode

        Returns:
            X_new (scipy.sparse.coo_matrix): sparse matrix encoding categorical
                                             variables into dummy variables
        """

        for col in range(X.shape[1]):
            X_col = self._transform_col(X[:, col], col)
            if X_col is not None:
                if col == 0:
                    X_new = X_col
                else:
                    X_new = sparse.hstack((X_new, X_col))

            logging.debug('{} --> {} features'.format(
                col, self.label_encoder.label_maxes[col])
            )

        return X_new
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def _propagate_features(self, task):
        """Propagate features from input array to output array."""
        p_out, p_in = self.job.predict_out, self.job.predict_in

        # Check for loss of obs between layers (i.e. with blendindex)
        n_in, n_out = p_in.shape[0], p_out.shape[0]
        r = int(n_in - n_out)

        if not issparse(p_in):
            # Simple item setting
            p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features]
        else:
            # Need to populate propagated features using scipy sparse hstack
            self.job.predict_out = hstack(
                [p_in[r:, task.propagate_features],
                 p_out[:, task.n_feature_prop:]]
            ).tolil()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def transform(self, X):
        """Transform X separately by each transformer, concatenate results.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Input data to be transformed.

        Returns
        -------
        X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.
        """
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, name, X, self.transformer_weights)
            for name, trans in self.transformer_list)
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = np.hstack(Xs)
        return Xs
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _transform_dense(self, X):
        non_zero = (X != 0.0)
        X_nz = X[non_zero]

        X_step = np.zeros_like(X)
        X_step[non_zero] = np.sqrt(X_nz * self.sample_interval_)

        X_new = [X_step]

        log_step_nz = self.sample_interval_ * np.log(X_nz)
        step_nz = 2 * X_nz * self.sample_interval_

        for j in range(1, self.sample_steps):
            factor_nz = np.sqrt(step_nz /
                                np.cosh(np.pi * j * self.sample_interval_))

            X_step = np.zeros_like(X)
            X_step[non_zero] = factor_nz * np.cos(j * log_step_nz)
            X_new.append(X_step)

            X_step = np.zeros_like(X)
            X_step[non_zero] = factor_nz * np.sin(j * log_step_nz)
            X_new.append(X_step)

        return np.hstack(X_new)
项目:deeppavlov    作者:deepmipt    | 项目源码 | 文件源码
def vectorize_select_from_data(data, vectorizers, selectors):
    """Vectorize data and select features.

    Args:
        data: list of text train samples
        vectorizers: list of vectorizers
        selectors: list of selectors

    Returns:
        features extracted from data using vectorizers and selectors lists
    """
    num_ngrams = len(vectorizers) - 1
    x = None

    for i in range(num_ngrams):
        x_i = vectorizers[i].transform(data)
        if selectors[i] is not None:
            x_i = selectors[i].transform(x_i)
        if i == 0:
            x = x_i
        else:
            x = sp.hstack([x, x_i])

    data_special = ngrams_you_are(data)
    x_i = vectorizers[-1].transform(data_special)
    if selectors[-1] is not None:
        x_i = selectors[-1].transform(x_i)
    x = sp.hstack([x, x_i])
    return x
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def aveE2CC(self):
        "Construct the averaging operator on cell edges to cell centers."
        if getattr(self, '_aveE2CC', None) is None:
            if self.dim == 2:
                raise Exception('aveE2CC not implemented in 2D')
            elif self.dim == 3:
                self._aveE2CC = 1./self.dim*sp.hstack([self.aveEx2CC, self.aveEy2CC, self.aveEz2CC])
        return self._aveE2CC
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def aveF2CC(self):
        "Construct the averaging operator on cell faces to cell centers."
        if getattr(self, '_aveF2CC', None) is None:
            if self.dim == 2:
                self._aveF2CC = 1./self.dim*sp.hstack([self.aveFx2CC, self.aveFy2CC]).tocsr()
            elif self.dim == 3:
                self._aveF2CC = 1./self.dim*sp.hstack([self.aveFx2CC, self.aveFy2CC, self.aveFz2CC]).tocsr()
        return self._aveF2CC
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def faceDiv(self):
        """Construct divergence operator (face-stg to cell-centres)."""
        if getattr(self, '_faceDiv', None) is None:
            n = self.vnC
            # Compute faceDivergence operator on faces
            D1 = self.faceDivx
            D3 = self.faceDivz
            if self.isSymmetric:
                D = sp.hstack((D1, D3), format="csr")
            elif self.nCy > 1:
                D2 = self.faceDivy
                D = sp.hstack((D1, D2, D3), format="csr")
            self._faceDiv = D
        return self._faceDiv
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def aveF2CC(self):
        "Construct the averaging operator on cell faces to cell centers."
        if getattr(self, '_aveF2CC', None) is None:
            n = self.vnC
            if self.isSymmetric:
                avR = utils.av(n[0])[:, 1:]
                avR[0, 0] = 1.
                self._aveF2CC = ((0.5)*sp.hstack((sp.kron(utils.speye(n[2]),
                                                          avR),
                                                  sp.kron(utils.av(n[2]),
                                                          utils.speye(n[0]))),
                                                 format="csr"))
            else:
                raise NotImplementedError('wrapping in the averaging is not '
                                          'yet implemented')
                # self._aveF2CC = (1./3.)*sp.hstack((utils.kron3(utils.speye(n[2]),
                #                                                utils.speye(n[1]),
                #                                                utils.av(n[0])),
                #                                    utils.kron3(utils.speye(n[2]),
                #                                                utils.av(n[1]),
                #                                                utils.speye(n[0])),
                #                                    utils.kron3(utils.av(n[2]),
                #                                                utils.speye(n[1]),
                #                                                utils.speye(n[0]))),
                #                                   format="csr")
        return self._aveF2CC
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def edgeCurl(self):
        """
        Construct the 3D curl operator.
        """
        if getattr(self, '_edgeCurl', None) is None:
            assert self.dim > 1, "Edge Curl only programed for 2 or 3D."

            n = self.vnC  # The number of cell centers in each direction
            L = self.edge  # Compute lengths of cell edges
            S = self.area # Compute areas of cell faces

            # Compute divergence operator on faces
            if self.dim == 2:

                D21 = sp.kron(ddx(n[1]), speye(n[0]))
                D12 = sp.kron(speye(n[1]), ddx(n[0]))
                C = sp.hstack((-D21, D12), format="csr")
                self._edgeCurl = C*sdiag(1/S)

            elif self.dim == 3:

                D32 = kron3(ddx(n[2]), speye(n[1]), speye(n[0]+1))
                D23 = kron3(speye(n[2]), ddx(n[1]), speye(n[0]+1))
                D31 = kron3(ddx(n[2]), speye(n[1]+1), speye(n[0]))
                D13 = kron3(speye(n[2]), speye(n[1]+1), ddx(n[0]))
                D21 = kron3(speye(n[2]+1), ddx(n[1]), speye(n[0]))
                D12 = kron3(speye(n[2]+1), speye(n[1]), ddx(n[0]))

                O1 = spzeros(np.shape(D32)[0], np.shape(D31)[1])
                O2 = spzeros(np.shape(D31)[0], np.shape(D32)[1])
                O3 = spzeros(np.shape(D21)[0], np.shape(D13)[1])

                C = sp.vstack((sp.hstack((O1, -D32, D23)),
                               sp.hstack((D31, O2, -D13)),
                               sp.hstack((-D21, D12, O3))), format="csr")

                self._edgeCurl = sdiag(1/S)*(C*sdiag(L))
        return self._edgeCurl
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def aveF2CC(self):
        "Construct the averaging operator on cell faces to cell centers."
        if getattr(self, '_aveF2CC', None) is None:
            if self.dim == 1:
                self._aveF2CC = self.aveFx2CC
            elif self.dim == 2:
                self._aveF2CC = (0.5)*sp.hstack((
                    self.aveFx2CC, self.aveFy2CC
                ), format="csr")
            elif self.dim == 3:
                self._aveF2CC = (1./3.)*sp.hstack((
                    self.aveFx2CC, self.aveFy2CC, self.aveFz2CC
                ), format="csr")
        return self._aveF2CC
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def aveE2CC(self):
        "Construct the averaging operator on cell edges to cell centers."
        if getattr(self, '_aveE2CC', None) is None:
            if self.dim == 1:
                self._avE2CC = self.aveEx2CC
            elif self.dim == 2:
                self._avE2CC = 0.5*sp.hstack(
                    (self.aveEx2CC, self.aveEy2CC), format="csr"
                )
            elif self.dim == 3:
                self._avE2CC = (1./3)*sp.hstack((
                    self.aveEx2CC, self.aveEy2CC, self.aveEz2CC
                ), format="csr")
        return self._avE2CC
项目:discretize    作者:simpeg    | 项目源码 | 文件源码
def makePropertyTensor(M, tensor):
    if tensor is None:  # default is ones
        tensor = np.ones(M.nC)

    if isScalar(tensor):
        tensor = tensor * np.ones(M.nC)

    propType = TensorType(M, tensor)
    if propType == 1:  # Isotropic!
        Sigma = sp.kron(sp.identity(M.dim), sdiag(mkvc(tensor)))
    elif propType == 2:  # Diagonal tensor
        Sigma = sdiag(mkvc(tensor))
    elif M.dim == 2 and tensor.size == M.nC*3:  # Fully anisotropic, 2D
        tensor = tensor.reshape((M.nC, 3), order='F')
        row1 = sp.hstack((sdiag(tensor[:, 0]), sdiag(tensor[:, 2])))
        row2 = sp.hstack((sdiag(tensor[:, 2]), sdiag(tensor[:, 1])))
        Sigma = sp.vstack((row1, row2))
    elif M.dim == 3 and tensor.size == M.nC*6:  # Fully anisotropic, 3D
        tensor = tensor.reshape((M.nC, 6), order='F')
        row1 = sp.hstack(
            (sdiag(tensor[:, 0]), sdiag(tensor[:, 3]), sdiag(tensor[:, 4]))
        )
        row2 = sp.hstack(
            (sdiag(tensor[:, 3]), sdiag(tensor[:, 1]), sdiag(tensor[:, 5]))
        )
        row3 = sp.hstack(
            (sdiag(tensor[:, 4]), sdiag(tensor[:, 5]), sdiag(tensor[:, 2]))
        )
        Sigma = sp.vstack((row1, row2, row3))
    else:
        raise Exception('Unexpected shape of tensor')

    return Sigma
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def offspring_crossover(parents,npoints=1):
    dimensions = parents.shape[1]
    crossover_points = []
    while len(set(crossover_points)) < npoints+1:
        crossover_points = sorted([random.choice(range(dimensions)) for point in range(npoints)] + [dimensions])
    parent_switch = 0
    point1 = 0
    segments = []
    for crossover in crossover_points:
        segments.append(return_segment(parents[parent_switch],point1,crossover))
        parent_switch = 1 if parent_switch == 0 else 0
        point1 = crossover
    offspring = sparse.hstack(segments).tocsr()
    return offspring
项目:kindred    作者:jakelever    | 项目源码 | 文件源码
def _vectorize(self,corpus,fit):
        assert isinstance(corpus,kindred.Corpus)

        matrices = []
        for feature in self.chosenFeatures:
            assert feature in self.featureInfo.keys()
            featureFunction = self.featureInfo[feature]['func']
            never_tfidf = self.featureInfo[feature]['never_tfidf']
            data = featureFunction(corpus)
            notEmpty = any( len(d)>0 for d in data )
            if fit:
                if notEmpty:
                    self.dictVectorizers[feature] = DictVectorizer()
                    if self.tfidf and not never_tfidf:
                        self.tfidfTransformers[feature] = TfidfTransformer()
                        intermediate = self.dictVectorizers[feature].fit_transform(data)
                        matrices.append(self.tfidfTransformers[feature].fit_transform(intermediate))
                    else:
                        matrices.append(self.dictVectorizers[feature].fit_transform(data))
            else:
                if feature in self.dictVectorizers:
                    if self.tfidf and not never_tfidf:
                        intermediate = self.dictVectorizers[feature].transform(data)
                        matrices.append(self.tfidfTransformers[feature].transform(intermediate))
                    else:
                        matrices.append(self.dictVectorizers[feature].transform(data))

        mergedMatrix = hstack(matrices)
        return mergedMatrix
项目:grove    作者:rigetticomputing    | 项目源码 | 文件源码
def to_realimag(z):
    """
    Convert a complex hermitian matrix to a real valued doubled up representation, i.e., for
    ``Z = Z_r + 1j * Z_i`` return ``R(Z)``::

        R(Z) = [ Z_r   Z_i]
               [-Z_i   Z_r]

    A complex hermitian matrix ``Z`` with elementwise real and imaginary parts
    ``Z = Z_r + 1j * Z_i`` can be
    isomorphically represented in doubled up form as::

        R(Z) = [ Z_r   Z_i]
               [-Z_i   Z_r]

        R(X)*R(Y) = [ (X_r*Y_r-X_i*Y_i)    (X_r*Y_i + X_i*Y_r)]
                    [-(X_r*Y_i + X_i*Y_r)  (X_r*Y_r-X_i*Y_i)  ]

                  = R(X*Y).

    In particular, ``Z`` is complex positive (semi-)definite iff ``R(Z)`` is real positive
    (semi-)definite.

    :param (qutip.Qobj|scipy.sparse.base.spmatrix) z:  The operator representation matrix.
    :returns: R(Z) the doubled up representation.
    :rtype: scipy.sparse.csr_matrix
    """
    if isinstance(z, qt.Qobj):
        z = z.data
    if not is_hermitian(z):  # pragma no coverage
        raise ValueError("Need a hermitian matrix z")
    return spvstack([sphstack([z.real, z.imag]), sphstack([z.imag.T, z.real])]).tocsr().real
项目:corpus-to-graph-ml    作者:CatalystCode    | 项目源码 | 文件源码
def get_bow_and_pos_features(train_samples, test_samples, ngram_range, pos_ngram_range):
    bow_train_features, bow_test_features = get_bow_features(train_samples, test_samples, ngram_range)
    pos_train_features, pos_test_features = to_pos_bow(train_samples, test_samples, ngram_range=pos_ngram_range)


    train_features = hstack((bow_train_features, pos_train_features))
    test_features = hstack((bow_test_features, pos_test_features))

    return train_features, test_features
项目:dask-searchcv    作者:dask    | 项目源码 | 文件源码
def feature_union_concat(Xs, nsamples, weights):
    """Apply weights and concatenate outputs from a FeatureUnion"""
    if any(x is FIT_FAILURE for x in Xs):
        return FIT_FAILURE
    Xs = [X if w is None else X * w for X, w in zip(Xs, weights)
          if X is not None]
    if not Xs:
        return np.zeros((nsamples, 0))
    if any(sparse.issparse(f) for f in Xs):
        return sparse.hstack(Xs).tocsr()
    return np.hstack(Xs)


# Current set_params isn't threadsafe
项目:Kaggler    作者:qqgeogor    | 项目源码 | 文件源码
def predict_raw(self, X):
        """Predict targets for a feature matrix.

        Args:
            X (np.array of float): feature matrix for prediction
        """
        # b -- bias for the input and h layers
        b = np.ones((X.shape[0], 1))
        w2 = self.w[-(self.h + 1):].reshape(self.h + 1, 1)
        w1 = self.w[:-(self.h + 1)].reshape(self.i + 1, self.h)

        # Make X to have the same number of columns as self.i.
        # Because of the sparse matrix representation, X for prediction can
        # have a different number of columns.
        if X.shape[1] > self.i:
            # If X has more columns, cut extra columns.
            X = X[:, :self.i]
        elif X.shape[1] < self.i:
            # If X has less columns, cut the rows of the weight matrix between
            # the input and h layers instead of X itself because the SciPy
            # sparse matrix does not support .set_shape() yet.
            idx = range(X.shape[1])
            idx.append(self.i)        # Include the last row for the bias
            w1 = w1[idx, :]

        if sparse.issparse(X):
            return np.hstack((sigm(sparse.hstack((X, b)).dot(w1)), b)).dot(w2)
        else:
            return np.hstack((sigm(np.hstack((X, b)).dot(w1)), b)).dot(w2)
项目:kaggle-allstate-claims-severity    作者:alno    | 项目源码 | 文件源码
def hstack(x):
    if any(sp.issparse(p) for p in x):
        return sp.hstack(x, format='csr')
    else:
        return np.hstack(x)
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def weighted_decision_path(self, X):
        """
        Returns the weighted decision path in the forest.

        Each non-zero value in the decision path determines the
        weight of that particular node while making predictions.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Input.

        Returns
        -------
        decision_path : sparse csr matrix, shape = (n_samples, n_total_nodes)
            Return a node indicator matrix where non zero elements
            indicate the weight of that particular node in making predictions.

        est_inds : array-like, shape = (n_estimators + 1,)
            weighted_decision_path[:, est_inds[i]: est_inds[i + 1]]
            provides the weighted_decision_path of estimator i
        """
        X = self._validate_X_predict(X)
        est_inds = np.cumsum(
            [0] + [est.tree_.node_count for est in self.estimators_])
        paths = sparse.hstack(
            [est.weighted_decision_path(X) for est in self.estimators_]).tocsr()
        return paths, est_inds

    # XXX: This is mainly a stripped version of BaseForest.fit
    # from sklearn.forest
项目:indigo    作者:mbdriscoll    | 项目源码 | 文件源码
def visit_HStack(self, node):
        """ HStack( SpMatrices ) => SpMatrix """
        node = self.generic_visit(node)
        if all(isinstance(c, SpMatrix) for c in node._children):
            name = "{}+".format(node._children[0]._name)
            dtype = node._children[0].dtype
            log.debug('realizing hstack %s', ', '.join(c._name for c in node._children))
            m = spp.hstack( [c._matrix for c in node._children], dtype=dtype )
            return SpMatrix( node._backend, m, name=name )
        else:
            return node
项目:indigo    作者:mbdriscoll    | 项目源码 | 文件源码
def test_HStack(backend, stack, M, N, K, density, alpha, beta):
    b = backend()
    mats_h = [indigo.util.randM(M,N,density) for i in range(stack)]
    A_h = spp.hstack(mats_h)

    mats_d = [b.SpMatrix(m) for m in mats_h]
    A = b.HStack(mats_d)

    # forward
    x = b.rand_array((A.shape[1],K))
    y = b.rand_array((A.shape[0],K))
    y_exp = beta * y.to_host() + alpha * A_h @ x.to_host()
    A.eval(y, x, alpha=alpha, beta=beta)
    npt.assert_allclose(y.to_host(), y_exp, rtol=1e-5)

    # adjoint
    x = b.rand_array((A.shape[0],K))
    y = b.rand_array((A.shape[1],K))
    y_exp = beta * y.to_host() + alpha * A_h.H @ x.to_host()
    A.H.eval(y, x, alpha=alpha, beta=beta)
    npt.assert_allclose(y.to_host(), y_exp, rtol=1e-5)

    # shape
    assert A.shape == (M,N*stack)
    assert A.H.shape == (N*stack,M)

    # dtype
    assert A.dtype == np.dtype('complex64')
项目:fake_news    作者:bmassman    | 项目源码 | 文件源码
def partial_X(self, **kwargs) -> None:
        """
        Set self.X to include subset of feature sets. The full value of X
        is then stored in self._full_X.
        """
        if self._X is None:
            self._get_values()
        if self._full_X is None:
            self._full_X = csc_matrix(deepcopy(self._X))
            self._full_feature_names = self.feature_names
        feature_map = {'author': 'auth',
                       'tfidf': 'text',
                       'tags': 'tag',
                       'title': 'title',
                       'domain_endings': 'domain',
                       'word_count': 'word_count',
                       'misspellings': 'misspellings',
                       'grammar_mistakes': 'grammar_mistakes',
                       'lshash': 'lsh',
                       'source_count': 'source_count',
                       'sentiment': 'sent'}
        feature_sets = set()
        for feature, include in kwargs.items():
            if not include:
                continue
            if not getattr(self, feature):
                raise ValueError('Cannot include feature that was not in'
                                 'original X.')
            feature_sets.add(feature_map[feature])
        kept_cols = []
        self.feature_names = {}
        new_col = count()
        for col, feature in self._full_feature_names.items():
            if any(feature.startswith(prefix) for prefix in feature_sets):
                kept_cols.append(col)
                self.feature_names[next(new_col)] = feature
        self._X = hstack([self._full_X.getcol(c) for c in kept_cols])
项目:heamy    作者:rushter    | 项目源码 | 文件源码
def transform(self, X):
        output = []
        for i, group in enumerate(self.groups):
            idx, val = group
            cond = csr_matrix((X[:, idx] < val).reshape((-1, 1))).astype(np.int8)
            output.append(cond)
        output = hstack(output, dtype=np.int8)
        return output
项目:kaggler-template    作者:jeongyoonlee    | 项目源码 | 文件源码
def generate_feature(train_file, test_file, train_feature_file,
                     test_feature_file, feature_map_file):
    logging.info('loading raw data')
    trn = pd.read_csv(train_file, index_col='id')
    tst = pd.read_csv(test_file, index_col='id')

    y = trn.loss.values
    n_trn = trn.shape[0]

    trn.drop('loss', axis=1, inplace=True)

    cat_cols = [x for x in trn.columns if trn[x].dtype == np.object]
    num_cols = [x for x in trn.columns if trn[x].dtype != np.object]

    logging.info('categorical: {}, numerical: {}'.format(len(cat_cols),
                                                         len(num_cols)))

    df = pd.concat([trn, tst], axis=0)

    logging.info('normalizing numeric features')
    nm = Normalizer()
    df.ix[:, num_cols] = nm.fit_transform(df[num_cols].values)

    logging.info('label encoding categorical variables')
    ohe = OneHotEncoder(min_obs=10)
    X_ohe = ohe.fit_transform(df[cat_cols].values)
    ohe_cols = ['ohe{}'.format(i) for i in range(X_ohe.shape[1])]

    X = sparse.hstack((df[num_cols].values, X_ohe), format='csr')

    with open(feature_map_file, 'w') as f:
        for i, col in enumerate(num_cols + ohe_cols):
            f.write('{}\t{}\tq\n'.format(i, col))

    logging.info('saving features')
    save_data(X[:n_trn,], y, train_feature_file)
    save_data(X[n_trn:,], None, test_feature_file)
项目:porn_sieve    作者:PornSieve    | 项目源码 | 文件源码
def refit_from_scratch(self):
        """ Create a new model directly from the database, rather
         than rely on the one saved from last time."""
        # In the background fit a much larger random forest.
        self.threaded_fit = ThreadedFit()
        self.threaded_fit.signal_finished.connect(self.__init__)
        self.threaded_fit.start()

        temp_model = RandomForest(max_features="sqrt", n_jobs=-1)
        temp_enc   = CountVectorizer()
        X = []   # binary matrix the presence of tags
        Z = []   # additional numerical data
        Y = []   # target (to predict) values
        db_size = self.db.size()
        for data in self.db.yield_some(250):
            feedback = data["feedback"]
            tags     = data[  "tags"  ]
            if feedback and tags:
                Y.append(   feedback   )
                X.append(" ".join(tags))
                Z.append(self.fmt_numerical(data))

        X = temp_enc.fit_transform(X)
        X = hstack((X, coo_matrix(Z)))
        self.allX = X
        pca = PCA(min(X.shape[0], 200))
        reduced_X = pca.fit_transform(X.todense())
        temp_model.fit(reduced_X, Y)

        self.pca   = pca
        self.model = temp_model
        self.enc   = temp_enc
项目:porn_sieve    作者:PornSieve    | 项目源码 | 文件源码
def predict(self, data):
        """ Given a dict of video data, predict how much
         the user will like the video. """
        tags = " ".join(data["tags"])
        tags = self.enc.transform([tags])
        nums = coo_matrix(self.fmt_numerical(data))
        x = hstack((tags, nums))
        x = self.pca.transform(x.todense())
        return self.model.predict(x)[0]
项目:porn_sieve    作者:PornSieve    | 项目源码 | 文件源码
def run(self):
        # making a copy of the database seems to keep
        # the scraper fast since there's not much need
        # for waiting around for the lock.
        print("threaded fit running.")
        shutil.copyfile("default.db", "_temp.db")
        db = Database("_temp.db")
        temp_model = RandomForest(max_features="sqrt", n_jobs=-1)
        temp_enc   = CountVectorizer()
        X = []   # binary matrix the presence of tags
        Z = []   # additional numerical data
        Y = []   # target (to predict) values
        db_size = db.size()
        for i, data in enumerate(db.yield_rated()):
            feedback = data["feedback"]
            tags     = data[  "tags"  ]
            if feedback and tags:
                Y.append(   feedback   )
                X.append(" ".join(tags))
                Z.append(self.fmt_numerical(data))

        X = temp_enc.fit_transform(X)
        X = hstack((X, coo_matrix(Z)))
        self.allX = X
        pca = PCA(min(X.shape[0], 200))
        reduced_X = pca.fit_transform(X.todense())
        temp_model.fit(reduced_X, Y)

        pca   = pca
        model = temp_model
        enc   = temp_enc

        joblib.dump(enc,   "usr_data/enc.pkl"  )
        joblib.dump(model, "usr_data/model.pkl")
        joblib.dump(pca,   "usr_data/pca.pkl"  )

        del db
        os.remove("_temp.db")

        self.signal_finished.emit()
        print("background fitting complete.")
项目:Theano-Deep-learning    作者:GeekLiB    | 项目源码 | 文件源码
def test_hstack_vstack():
    """
    Tests sparse.hstack and sparse.vstack (as opposed to the HStack and VStack
    classes that they wrap).
    """

    def make_block(dtype):
        return theano.sparse.csr_matrix(name="%s block" % dtype,
                                        dtype=dtype)

    def get_expected_dtype(blocks, to_dtype):
        if to_dtype is None:
            block_dtypes = tuple(b.dtype for b in blocks)
            return theano.scalar.upcast(*block_dtypes)
        else:
            return to_dtype

    # a deliberately weird mix of dtypes to stack
    dtypes = ('complex128', theano.config.floatX)

    blocks = [make_block(dtype) for dtype in dtypes]

    for stack_dimension, stack_function in enumerate((theano.sparse.vstack,
                                                      theano.sparse.hstack)):

        for to_dtype in (None, ) + dtypes:
            stacked_blocks = stack_function(blocks, dtype=to_dtype)
            expected_dtype = get_expected_dtype(blocks, to_dtype)
            assert stacked_blocks.dtype == expected_dtype
项目:zhihu-machine-learning-challenge-2017    作者:HouJP    | 项目源码 | 文件源码
def merge_col(features_1, features_2):
        """
        merge features made split by column
        :param features_1: the first part of features
        :param features_2: the second part of features
        :return: feature matrix
        """
        features = hstack([features_1, features_2])
        (row_num, col_num) = features.shape
        LogUtil.log("INFO", "merge col done, shape=(%d,%d)" % (row_num, col_num))
        return features
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def spatial_inter_hemi_connectivity(src, dist, verbose=None):
    """Get vertices on each hemisphere that are close to the other hemisphere

    Parameters
    ----------
    src : instance of SourceSpaces
        The source space. Must be surface type.
    dist : float
        Maximal Euclidean distance (in m) between vertices in one hemisphere
        compared to the other to consider neighbors.
    verbose : bool, str, int, or None
        If not None, override default verbose level (see mne.verbose).

    Returns
    -------
    connectivity : sparse COO matrix
        The connectivity matrix describing the spatial graph structure.
        Typically this should be combined (addititively) with another
        existing intra-hemispheric connectivity matrix, e.g. computed
        using geodesic distances.
    """
    from scipy.spatial.distance import cdist
    src = _ensure_src(src, kind='surf')
    conn = cdist(src[0]['rr'][src[0]['vertno']],
                 src[1]['rr'][src[1]['vertno']])
    conn = sparse.csr_matrix(conn <= dist, dtype=int)
    empties = [sparse.csr_matrix((nv, nv), dtype=int) for nv in conn.shape]
    conn = sparse.vstack([sparse.hstack([empties[0], conn]),
                          sparse.hstack([conn.T, empties[1]])])
    return conn
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, **fit_params):
        """Fit all transformers using X, transform the data and concatenate
        results.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Input data to be transformed.

        Returns
        -------
        X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.
        """
        result = Parallel(n_jobs=self.n_jobs)(
            delayed(_fit_transform_one)(trans, name, X, y,
                                        self.transformer_weights, **fit_params)
            for name, trans in self.transformer_list)

        Xs, transformers = zip(*result)
        self._update_transformer_list(transformers)
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = np.hstack(Xs)
        return Xs
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def decision_path(self, X):
        """Return the decision path in the forest

        Parameters
        ----------
        X : array-like or sparse matrix, shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        Returns
        -------
        indicator : sparse csr array, shape = [n_samples, n_nodes]
            Return a node indicator matrix where non zero elements
            indicates that the samples goes through the nodes.

        n_nodes_ptr : array of size (n_estimators + 1, )
            The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]
            gives the indicator value for the i-th estimator.
        """
        X = self._validate_X_predict(X)
        indicators = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
                              backend="threading")(
            delayed(parallel_helper)(tree, 'decision_path', X,
                                      check_input=False)
            for tree in self.estimators_)

        n_nodes = [0]
        n_nodes.extend([i.shape[1] for i in indicators])
        n_nodes_ptr = np.array(n_nodes).cumsum()

        return sparse_hstack(indicators).tocsr(), n_nodes_ptr