Python scipy.sparse 模块，issparse() 实例源码

我们从Python开源项目中，提取了以下50个代码示例，用于说明如何使用scipy.sparse.issparse()。

项目：SINDy 作者：loiseaujc | 项目源码 | 文件源码

def scipy_sparse_to_cvx_sparse(x):
    '''
    This function takes as input as SciPy sparse matrix and converts it into
    a CVX sparse one.

    Inputs:
    ------
        x : SciPy sparse matrix.

    Outputs:
    -------
        y : CVX sparse matrix.
    '''

    # --> Check that the input matrix is indeed a scipy sparse matrix.
    if sparse.issparse(x) is not True:
        raise ValueError('Input matrix is not a SciPy sparse matrix.')

    # --> Convert x to COOdinate format.
    coo = x.tocoo()

    # --> Create the corresponding cvx sparse matrix.
    y = spmatrix(coo.data, coo.row.tolist(), coo.col.tolist())

    return y

项目：coremltools 作者：gsabran | 项目源码 | 文件源码

def _sanitize_value(x):
    """
    Performs cleaning steps on the data so various type comparisons can
    be performed correctly.
    """
    if isinstance(x, (float, str, unicode, int, long)):
        return x
    elif _HAS_SKLEARN and _sp.issparse(x):
        return x.todense()
    elif isinstance(x, _np.ndarray):
        return x
    elif isinstance(x, tuple):
        return (_sanitize_value(v) for v in x)
    elif isinstance(x, list):
        return [_sanitize_value(v) for v in x]
    elif isinstance(x, dict):
        return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items())
    else:
        assert False, str(x)

项目：tbp-next-basket 作者：GiulioRossetti | 项目源码 | 文件源码

def _return_float_dtype(X, Y):
    """
    1. If dtype of X and Y is float32, then dtype float32 is returned.
    2. Else dtype float is returned.
    """
    if not issparse(X) and not isinstance(X, np.ndarray):
        X = np.asarray(X)

    if Y is None:
        Y_dtype = X.dtype
    elif not issparse(Y) and not isinstance(Y, np.ndarray):
        Y = np.asarray(Y)
        Y_dtype = Y.dtype
    else:
        Y_dtype = Y.dtype

    if X.dtype == Y_dtype == np.float32:
        dtype = np.float32
    else:
        dtype = np.float

    return X, Y, dtype

项目：AutoML-Challenge 作者：postech-mlg-exbrain | 项目源码 | 文件源码

def __call__(self, X, y, categorical=None, metafeatures=None, helpers=None):
        if categorical is None:
            categorical = [False for i in range(X.shape[1])]

        start_time = time()
        try:
            if issparse(X) and hasattr(self, "_calculate_sparse"):
                value = self._calculate_sparse(X, y, categorical, metafeatures, helpers)
            else:
                value = self._calculate(X, y, categorical, metafeatures, helpers)
            comment = ""
        except MemoryError as e:
            value = None
            comment = "Memory Error"
        end_time = time()

        return MetaFeatureValue(self.__class__.__name__, self.type_,
                                0, 0, value, end_time-start_time, comment=comment)

项目：AutoML-Challenge 作者：postech-mlg-exbrain | 项目源码 | 文件源码

def transform(self, X):
        """Scaling features of X according to feature_range.

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            Input data that will be transformed.
        """
        check_is_fitted(self, 'scale_')

        X = check_array(X, accept_sparse="csc", copy=self.copy,
                        dtype=np.float32)

        if sparse.issparse(X):
            for i in range(X.shape[1]):
                X.data[X.indptr[i]:X.indptr[i + 1]] *= self.scale_[i]
                X.data[X.indptr[i]:X.indptr[i + 1]] += self.min_[i]
        else:
            X *= self.scale_
            X += self.min_
        return X

项目：AutoML-Challenge 作者：postech-mlg-exbrain | 项目源码 | 文件源码

def __str__(self):
        val = 'DataManager : ' + self.name + '\ninfo:\n'
        for item in self.info:
            val += '\t' + item + ' = ' + str(self.info[item]) + '\n'
        val += 'data:\n'

        for subset in self.data:
            val += '\t%s = %s %s %s\n' % (subset, type(self.data[subset]),
                                          str(self.data[subset].shape),
                                          str(self.data[subset].dtype))
            if issparse(self.data[subset]):
                val += '\tdensity: %f\n' % \
                       (float(len(self.data[subset].data))
                        / self.data[subset].shape[0]
                        / self.data[subset].shape[1])
        val += 'feat_type:\t' + str(self.feat_type) + '\n'
        return val

项目：rTensor 作者：erichson | 项目源码 | 文件源码

def nvecs(X, n, rank, do_flipsign=True, dtype=np.float):
    """
    Eigendecomposition of mode-n unfolding of a tensor
    """
    Xn = X.unfold(n)
    if issparse_mat(Xn):
        Xn = csr_matrix(Xn, dtype=dtype)
        Y = Xn.dot(Xn.T)
        _, U = eigsh(Y, rank, which='LM')
    else:
        Y = Xn.dot(Xn.T)
        N = Y.shape[0]
        _, U = eigh(Y, eigvals=(N - rank, N - 1))
        #_, U = eigsh(Y, rank, which='LM')
    # reverse order of eigenvectors such that eigenvalues are decreasing
    U = array(U[:, ::-1])
    # flip sign
    if do_flipsign:
        U = flipsign(U)
    return U

项目：gcForest 作者：kingfengji | 项目源码 | 文件源码

def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None):
    """
    ExtraTrees
    """
    from sklearn.ensemble import ExtraTreesClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred

项目：gcForest 作者：kingfengji | 项目源码 | 文件源码

def prec_rf(n_trees, X_train, y_train, X_test, y_test):
    """
    ExtraTrees
    """
    from sklearn.ensemble import RandomForestClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred

项目：gcForest 作者：kingfengji | 项目源码 | 文件源码

def prec_log(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test = X_test.reshape((X_test.shape[0], -1))
    clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_log={:.6f}%'.format(prec*100.0))
    return clf, y_pred

项目：coremltools 作者：apple | 项目源码 | 文件源码

def _sanitize_value(x):
    """
    Performs cleaning steps on the data so various type comparisons can
    be performed correctly.
    """
    if isinstance(x, _six.string_types + _six.integer_types + (float,)):
        return x
    elif _HAS_SKLEARN and _sp.issparse(x):
        return x.todense()
    elif isinstance(x, _np.ndarray):
        return x
    elif isinstance(x, tuple):
        return (_sanitize_value(v) for v in x)
    elif isinstance(x, list):
        return [_sanitize_value(v) for v in x]
    elif isinstance(x, dict):
        return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items())
    else:
        assert False, str(x)

项目：polylearn 作者：scikit-learn-contrib | 项目源码 | 文件源码

def test_safe_power_sparse():
    # TODO maybe move to a util module or something
    # scikit-learn has safe_sqr but not general power

    X_quad = X ** 4
    # assert X stays sparse
    X_sp = sp.csr_matrix(X)
    for sp_format in ('csr', 'csc', 'coo'):  # not working with lil for now
        X_sp = X_sp.asformat(sp_format)
        X_sp_quad = safe_power(X_sp, degree=4)
        assert_true(sp.issparse(X_sp_quad),
                    msg="safe_power breaks {} sparsity".format(sp_format))
        assert_array_almost_equal(X_quad,
                                  X_sp_quad.A,
                                  err_msg="safe_power differs for {} and "
                                          "dense".format(sp_format))

项目：vsmlib 作者：undertherain | 项目源码 | 文件源码

def filter_rows(self, ids_of_interest):
        # return (cooccurrence[1].todense()[:width])
        xdim = self.matrix.shape[1]
        dense = np.empty([0, xdim])
        # dense=np.empty([0,width])
        for i in ids_of_interest:
            if i < 0:
                continue
            if sparse.issparse(self.matrix):
                row = self.matrix[i].todense()
            else:
                row = self.matrix[i]
            row = np.asarray(row)
            row = np.reshape(row, (xdim))
            # dense=np.vstack([dense,row[:width]])
            dense = np.vstack([dense, row])
        return dense

项目：kaggle-allstate-claims-severity 作者：alno | 项目源码 | 文件源码

def batch_generator(X, y=None, batch_size=128, shuffle=False):
    index = np.arange(X.shape[0])

    while True:
        if shuffle:
            np.random.shuffle(index)

        batch_start = 0
        while batch_start < X.shape[0]:
            batch_index = index[batch_start:batch_start + batch_size]
            batch_start += batch_size

            X_batch = X[batch_index, :]

            if sp.issparse(X_batch):
                X_batch = X_batch.toarray()

            if y is None:
                yield X_batch
            else:
                yield X_batch, y[batch_index]

项目：scikit-garden 作者：scikit-garden | 项目源码 | 文件源码

def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X

项目：mlens 作者：flennerhag | 项目源码 | 文件源码

def indexable(*iterables):
    """Make arrays indexable for cross-validation.
    Checks consistent length, passes through None, and ensures that everything
    can be indexed by converting sparse matrices to csr and converting
    non-interable objects to arrays.
    Parameters
    ----------
    *iterables : lists, dataframes, arrays, sparse matrices
        List of objects to ensure sliceability.
    """
    result = []
    for X in iterables:
        if sp.issparse(X):
            result.append(X.tocsr())
        elif hasattr(X, "__getitem__") or hasattr(X, "iloc"):
            result.append(X)
        elif X is None:
            result.append(X)
        else:
            result.append(np.array(X))
    check_consistent_length(*result)
    return result

项目：mlens 作者：flennerhag | 项目源码 | 文件源码

def update(self):
        """Updated output array and shift to input if stacked.

        If stacking is en force, the output array will replace the input
        array, and used as input for subsequent jobs. Sparse matrices are
        force-converted to ``csr`` format.
        """
        if self.predict_out is None:
            return
        if (issparse(self.predict_out) and not
                self.predict_out.__class__.__name__.startswith('csr')):
            # Enforce csr on spare matrices
            self.predict_out = self.predict_out.tocsr()

        if self.stack:
            self.predict_in = self.predict_out
            self.rebase()

项目：mlens 作者：flennerhag | 项目源码 | 文件源码

def _propagate_features(self, task):
        """Propagate features from input array to output array."""
        p_out, p_in = self.job.predict_out, self.job.predict_in

        # Check for loss of obs between layers (i.e. with blendindex)
        n_in, n_out = p_in.shape[0], p_out.shape[0]
        r = int(n_in - n_out)

        if not issparse(p_in):
            # Simple item setting
            p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features]
        else:
            # Need to populate propagated features using scipy sparse hstack
            self.job.predict_out = hstack(
                [p_in[r:, task.propagate_features],
                 p_out[:, task.n_feature_prop:]]
            ).tolil()

项目：coordinates 作者：markovmodel | 项目源码 | 文件源码

def ensure_ndarray_or_sparse(A, shape=None, uniform=None, ndim=None, size=None, dtype=None, kind=None):
    r""" Ensures A is an ndarray or a scipy sparse matrix and does an assert_array with the given parameters

    Returns
    -------
    A : ndarray
        If A is already an ndarray, it is just returned. Otherwise this is an independent copy as an ndarray

    """
    if not isinstance(A, np.ndarray) and not scisp.issparse(A):
        try:
            A = np.array(A)
        except:
            raise AssertionError('Given argument cannot be converted to an ndarray:\n'+str(A))
    assert_array(A, shape=shape, uniform=uniform, ndim=ndim, size=size, dtype=dtype, kind=kind)
    return A

项目：ip-nonlinear-solver 作者：antonior92 | 项目源码 | 文件源码

def _concatenate_dense_jac(jac_list):
    # Read sequentially all jacobians.
    # Convert all values to numpy arrays.
    jac_ineq_list = []
    jac_eq_list = []
    for jac_tuple in jac_list:
        J_ineq, J_eq = jac_tuple
        if spc.issparse(J_ineq):
            jac_ineq_list += [J_ineq.toarray()]
        else:
            jac_ineq_list += [np.atleast_2d(J_ineq)]
        if spc.issparse(J_eq):
            jac_eq_list += [J_eq.toarray()]
        else:
            jac_eq_list += [np.atleast_2d(J_eq)]
    # Concatenate all
    J_ineq = np.vstack(jac_ineq_list)
    J_eq = np.vstack(jac_eq_list)
    # Return
    return J_ineq, J_eq

项目：ip-nonlinear-solver 作者：antonior92 | 项目源码 | 文件源码

def _compute_jacobian(self, J_eq, J_ineq, s):
        if self.n_ineq == 0:
            return J_eq
        else:
            if spc.issparse(J_eq) or spc.issparse(J_ineq):
                # It is expected that J_eq and J_ineq
                # are already `csr_matrix` because of
                # the way ``BoxConstraint``, ``NonlinearConstraint``
                # and ``LinearConstraint`` are defined.
                J_eq = spc.csr_matrix(J_eq)
                J_ineq = spc.csr_matrix(J_ineq)
                return self._assemble_sparse_jacobian(J_eq, J_ineq, s)
            else:
                S = np.diag(s)
                zeros = np.zeros((self.n_eq, self.n_ineq))
                # Convert to matrix
                if spc.issparse(J_ineq):
                    J_ineq = J_ineq.toarray()
                if spc.issparse(J_eq):
                    J_eq = J_eq.toarray()
                # Concatenate matrices
                return np.asarray(np.bmat([[J_eq, zeros],
                                           [J_ineq, S]]))

项目：SINDy 作者：loiseaujc | 项目源码 | 文件源码

def scipy_sparse_to_cvx_sparse(x):
    '''
    This function takes as input as SciPy sparse matrix and converts it into
    a CVX sparse one.

    Inputs:
    ------
        x : SciPy sparse matrix.

    Outputs:
    -------
        y : CVX sparse matrix.
    '''

    # --> Check that the input matrix is indeed a scipy sparse matrix.
    if sparse.issparse(x) is not True:
        raise ValueError('Input matrix is not a SciPy sparse matrix.')

    # --> Convert x to COOdinate format.
    coo = x.tocoo()

    # --> Create the corresponding cvx sparse matrix.
    y = spmatrix(coo.data, coo.row.tolist(), coo.col.tolist())

    return y

项目：muffnn 作者：civisanalytics | 项目源码 | 文件源码

def _make_feed_dict(self, X, inverse=False, training=False):
        # Make the dictionary mapping tensor placeholders to input data.

        # Convert sparse inputs to dense.
        if sp.issparse(X):
            X = X.todense().A

        if inverse:
            feed_dict = {self._encoded_values: X}
        else:
            feed_dict = {self._input_values: X}

        # If not training, turn off dropout (i.e., set keep_prob = 1.0).
        feed_dict[self._keep_prob] = self.keep_prob if training else 1.0

        feed_dict[self._sigmoid_msk] \
            = self._sigmoid_msk_values[0:X.shape[0], :]

        feed_dict[self._default_msk] \
            = self._default_msk_values[0:X.shape[0], :]

        feed_dict[self._softmax_msks] \
            = self._softmax_msks_values[:, 0:X.shape[0], :]

        return feed_dict

项目：mle_rev 作者：trendelkampschroer | 项目源码 | 文件源码

def mydot(A, B):
    r"""Dot-product that can handle dense and sparse arrays

    Parameters
    ----------
    A : numpy ndarray or scipy sparse matrix
        The first factor
    B : numpy ndarray or scipy sparse matrix
        The second factor

    Returns
    C : numpy ndarray or scipy sparse matrix
        The dot-product of A and B

    """
    if issparse(A) :
        return A.dot(B)
    elif issparse(B):
        return (B.T.dot(A.T)).T
    else:
        return np.dot(A, B)

项目：mle_rev 作者：trendelkampschroer | 项目源码 | 文件源码

def convert_solution(z, Cs):
    if issparse(Cs):
        Cs = Cs.toarray()
    M = Cs.shape[0]
    x = z[0:M]
    y = z[M:]

    w=np.exp(y)
    pi=w/w.sum()

    X=pi[:,np.newaxis]*x[np.newaxis,:]
    Y=X+np.transpose(X)
    denom=Y
    enum=Cs*np.transpose(pi)
    P=enum/denom
    ind=np.diag_indices(Cs.shape[0])
    P[ind]=0.0
    rowsums=P.sum(axis=1)
    P[ind]=1.0-rowsums
    return pi, P

###############################################################################
# Objective, Gradient, and Hessian
###############################################################################

项目：mle_rev 作者：trendelkampschroer | 项目源码 | 文件源码

def mydot(A, B):
    r"""Dot-product that can handle dense and sparse arrays

    Parameters
    ----------
    A : numpy ndarray or scipy sparse matrix
        The first factor
    B : numpy ndarray or scipy sparse matrix
        The second factor

    Returns
    C : numpy ndarray or scipy sparse matrix
        The dot-product of A and B

    """
    if issparse(A) :
        return A.dot(B)
    elif issparse(B):
        return (B.T.dot(A.T)).T
    else:
        return np.dot(A, B)

项目：scanpy 作者：theislab | 项目源码 | 文件源码

def get_knn_edges_sparse(dmat, k):
    edge_dict = {}
    if not issparse(dmat):
        return get_knn_edges(dmat,k)
    else:
        for i in range(dmat.shape[0]):
            l=1
            saved_values={}
            while l<k:
                row = dmat.getrow(i)
                data_index=row.data.argmin()
                j=row.indices[data_index]
                saved_values[j] = dmat[i, j]
                if i != j:
                    ii, jj = tuple(sorted([i, j]))
                    edge_dict[(ii, jj)] = dmat[i, j]
                dmat[i, j] = inf
                l = l + 1
            # Rewrite safed values:
            for j in saved_values:
                dmat[i, j] = saved_values[j]
    return edge_dict.keys()

项目：scanpy 作者：theislab | 项目源码 | 文件源码

def log1p(data, copy=False):
    """Logarithmize the data matrix.

    Computes `X = log(X + 1)`, where `log` denotes the natural logrithm.

    Parameters
    ----------
    data : array-like or AnnData
        The data matrix.
    copy : bool (default: False)
        If an AnnData is passed, determines whether a copy is returned.

    Returns
    -------
    Returns or updates data, depending on `copy`.
    """
    if isinstance(data, AnnData):
        adata = data.copy() if copy else data
        adata.X = log1p(data.X)
        return adata if copy else None
    X = data  # proceed with data matrix
    if not issparse(X):
        return np.log1p(X)
    else:
        return X.log1p()

项目：scanpy 作者：theislab | 项目源码 | 文件源码

def _get_mean_var(X):
    # - using sklearn.StandardScaler throws an error related to
    #   int to long trafo for very large matrices
    # - using X.multiply is slower
    if True:
        mean = X.mean(axis=0)
        if issparse(X):
            mean_sq = X.multiply(X).mean(axis=0)
            mean = mean.A1
            mean_sq = mean_sq.A1
        else:
            mean_sq = np.multiply(X, X).mean(axis=0)
        # enforece R convention (unbiased estimator) for variance
        var = (mean_sq - mean**2) * (X.shape[0]/(X.shape[0]-1))
    else:
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler(with_mean=False).partial_fit(X)
        mean = scaler.mean_
        # enforce R convention (unbiased estimator)
        var = scaler.var_ * (X.shape[0]/(X.shape[0]-1))
    return mean, var

项目：scanpy 作者：theislab | 项目源码 | 文件源码

def _scale(X, zero_center=True):
    # - using sklearn.StandardScaler throws an error related to
    #   int to long trafo for very large matrices
    # - using X.multiply is slower
    #   the result differs very slightly, why?
    if True:
        mean, var = _get_mean_var(X)
        scale = np.sqrt(var)
        if issparse(X):
            if zero_center: raise ValueError('Cannot zero-center sparse matrix.')
            sparsefuncs.inplace_column_scale(X, 1/scale)
        else:
            X -= mean
            X /= scale
    else:
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler(with_mean=zero_center, copy=False).partial_fit(X)
        # user R convention (unbiased estimator)
        scaler.scale_ *= np.sqrt(X.shape[0]/(X.shape[0]-1))
        scaler.transform(X)

项目：gcforest 作者：w821881341 | 项目源码 | 文件源码

def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None):
    """
    ExtraTrees
    """
    from sklearn.ensemble import ExtraTreesClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred

项目：gcforest 作者：w821881341 | 项目源码 | 文件源码

def prec_rf(n_trees, X_train, y_train, X_test, y_test):
    """
    ExtraTrees
    """
    from sklearn.ensemble import RandomForestClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred

项目：gcforest 作者：w821881341 | 项目源码 | 文件源码

def prec_log(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test = X_test.reshape((X_test.shape[0], -1))
    clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_log={:.6f}%'.format(prec*100.0))
    return clf, y_pred

项目：RandomForestClustering 作者：joshloyal | 项目源码 | 文件源码

def fit_transform(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)

        if sp.issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        X_, y_ = generate_discriminative_dataset(X)

        super(RandomForestEmbedding, self).fit(X_, y_,
                                               sample_weight=sample_weight)

        self.one_hot_encoder_ = OneHotEncoder(sparse=True)
        if self.sparse_output:
            return self.one_hot_encoder_.fit_transform(self.apply(X))
        return self.apply(X)

项目：loompy 作者：linnarsson-lab | 项目源码 | 文件源码

def normalize_attr_array(a: Any) -> np.ndarray:
    """
    Take all kinds of array-like inputs and normalize to a one-dimensional np.ndarray
    """
    if type(a) is np.ndarray:
        return a
    elif type(a) is np.matrix:
        if a.shape[0] == 1:
            return np.array(a)[0, :]
        elif a.shape[1] == 1:
            return np.array(a)[:, 0]
        else:
            raise ValueError("Attribute values must be 1-dimensional.")
    elif type(a) is list or type(a) is tuple:
        return np.array(a)
    elif sparse.issparse(a):
        return normalize_attr_array(a.todense())
    else:
        raise ValueError("Argument must be a list, tuple, numpy matrix, numpy ndarray or sparse matrix.")

项目：spherecluster 作者：clara-labs | 项目源码 | 文件源码

def _check_fit_data(self, X):
        """Verify that the number of samples given is larger than k"""
        X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32])
        n_samples, n_features = X.shape
        if X.shape[0] < self.n_clusters:
            raise ValueError("n_samples=%d should be >= n_clusters=%d" % (
                X.shape[0], self.n_clusters))

        for ee in range(n_samples):
            if sp.issparse(X):
                n = sp.linalg.norm(X[ee, :])
            else:
                n = np.linalg.norm(X[ee, :])

            if np.abs(n - 1.) > 1e-4:
                raise ValueError("Data l2-norm must be 1, found {}".format(n))

        return X

项目：spherecluster 作者：clara-labs | 项目源码 | 文件源码

def _check_test_data(self, X):
        X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES,
                        warn_on_dtype=True)
        n_samples, n_features = X.shape
        expected_n_features = self.cluster_centers_.shape[1]
        if not n_features == expected_n_features:
            raise ValueError("Incorrect number of features. "
                             "Got %d features, expected %d" % (
                                 n_features, expected_n_features))

        for ee in range(n_samples):
            if sp.issparse(X):
                n = sp.linalg.norm(X[ee, :])
            else:
                n = np.linalg.norm(X[ee, :])

            if np.abs(n - 1.) > 1e-4:
                raise ValueError("Data l2-norm must be 1, found {}".format(n))

        return X

项目：ycml 作者：skylander86 | 项目源码 | 文件源码

def keras_fit(self, X, Y, *, nn_model=None, validation_data=None, resume=None, **fit_args):
        if nn_model is None: nn_model = getattr(self, self.NN_MODEL_ATTRIBUTE)

        if not self._pre_fit_setup(nn_model, resume=resume, **fit_args): return

        if sps.issparse(X): X = X.toarray()
        if sps.issparse(Y): Y = Y.toarray()
        if validation_data is not None:
            X_validation, Y_validation = validation_data
            validation_data = (X_validation.toarray() if sps.issparse(X_validation) else X_validation, Y_validation.toarray() if sps.issparse(Y_validation) else Y_validation)
        #end if

        logger.info('{} instances used for training and {} instances used for validation.'.format(Y.shape[0], validation_data[1].shape[0] if validation_data else int(self.validation_size * Y.shape[0])))

        return nn_model.fit(X, Y, validation_data=validation_data, validation_split=0.0 if validation_data is not None else self.validation_size, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=self.build_callbacks(), initial_epoch=self.initial_epoch, **fit_args)
    #end def