Python scipy.sparse 模块,issparse() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.sparse.issparse()

项目:SINDy    作者:loiseaujc    | 项目源码 | 文件源码
def scipy_sparse_to_cvx_sparse(x):
    '''
    This function takes as input as SciPy sparse matrix and converts it into
    a CVX sparse one.

    Inputs:
    ------
        x : SciPy sparse matrix.

    Outputs:
    -------
        y : CVX sparse matrix.
    '''

    # --> Check that the input matrix is indeed a scipy sparse matrix.
    if sparse.issparse(x) is not True:
        raise ValueError('Input matrix is not a SciPy sparse matrix.')

    # --> Convert x to COOdinate format.
    coo = x.tocoo()

    # --> Create the corresponding cvx sparse matrix.
    y = spmatrix(coo.data, coo.row.tolist(), coo.col.tolist())

    return y
项目:coremltools    作者:gsabran    | 项目源码 | 文件源码
def _sanitize_value(x):
    """
    Performs cleaning steps on the data so various type comparisons can
    be performed correctly.
    """
    if isinstance(x, (float, str, unicode, int, long)):
        return x
    elif _HAS_SKLEARN and _sp.issparse(x):
        return x.todense()
    elif isinstance(x, _np.ndarray):
        return x
    elif isinstance(x, tuple):
        return (_sanitize_value(v) for v in x)
    elif isinstance(x, list):
        return [_sanitize_value(v) for v in x]
    elif isinstance(x, dict):
        return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items())
    else:
        assert False, str(x)
项目:tbp-next-basket    作者:GiulioRossetti    | 项目源码 | 文件源码
def _return_float_dtype(X, Y):
    """
    1. If dtype of X and Y is float32, then dtype float32 is returned.
    2. Else dtype float is returned.
    """
    if not issparse(X) and not isinstance(X, np.ndarray):
        X = np.asarray(X)

    if Y is None:
        Y_dtype = X.dtype
    elif not issparse(Y) and not isinstance(Y, np.ndarray):
        Y = np.asarray(Y)
        Y_dtype = Y.dtype
    else:
        Y_dtype = Y.dtype

    if X.dtype == Y_dtype == np.float32:
        dtype = np.float32
    else:
        dtype = np.float

    return X, Y, dtype
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def __call__(self, X, y, categorical=None, metafeatures=None, helpers=None):
        if categorical is None:
            categorical = [False for i in range(X.shape[1])]

        start_time = time()
        try:
            if issparse(X) and hasattr(self, "_calculate_sparse"):
                value = self._calculate_sparse(X, y, categorical, metafeatures, helpers)
            else:
                value = self._calculate(X, y, categorical, metafeatures, helpers)
            comment = ""
        except MemoryError as e:
            value = None
            comment = "Memory Error"
        end_time = time()

        return MetaFeatureValue(self.__class__.__name__, self.type_,
                                0, 0, value, end_time-start_time, comment=comment)
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def transform(self, X):
        """Scaling features of X according to feature_range.

        Parameters
        ----------
        X : array-like with shape [n_samples, n_features]
            Input data that will be transformed.
        """
        check_is_fitted(self, 'scale_')

        X = check_array(X, accept_sparse="csc", copy=self.copy,
                        dtype=np.float32)

        if sparse.issparse(X):
            for i in range(X.shape[1]):
                X.data[X.indptr[i]:X.indptr[i + 1]] *= self.scale_[i]
                X.data[X.indptr[i]:X.indptr[i + 1]] += self.min_[i]
        else:
            X *= self.scale_
            X += self.min_
        return X
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def __str__(self):
        val = 'DataManager : ' + self.name + '\ninfo:\n'
        for item in self.info:
            val += '\t' + item + ' = ' + str(self.info[item]) + '\n'
        val += 'data:\n'

        for subset in self.data:
            val += '\t%s = %s %s %s\n' % (subset, type(self.data[subset]),
                                          str(self.data[subset].shape),
                                          str(self.data[subset].dtype))
            if issparse(self.data[subset]):
                val += '\tdensity: %f\n' % \
                       (float(len(self.data[subset].data))
                        / self.data[subset].shape[0]
                        / self.data[subset].shape[1])
        val += 'feat_type:\t' + str(self.feat_type) + '\n'
        return val
项目:rTensor    作者:erichson    | 项目源码 | 文件源码
def nvecs(X, n, rank, do_flipsign=True, dtype=np.float):
    """
    Eigendecomposition of mode-n unfolding of a tensor
    """
    Xn = X.unfold(n)
    if issparse_mat(Xn):
        Xn = csr_matrix(Xn, dtype=dtype)
        Y = Xn.dot(Xn.T)
        _, U = eigsh(Y, rank, which='LM')
    else:
        Y = Xn.dot(Xn.T)
        N = Y.shape[0]
        _, U = eigh(Y, eigvals=(N - rank, N - 1))
        #_, U = eigsh(Y, rank, which='LM')
    # reverse order of eigenvectors such that eigenvalues are decreasing
    U = array(U[:, ::-1])
    # flip sign
    if do_flipsign:
        U = flipsign(U)
    return U
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None):
    """
    ExtraTrees
    """
    from sklearn.ensemble import ExtraTreesClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def prec_rf(n_trees, X_train, y_train, X_test, y_test):
    """
    ExtraTrees
    """
    from sklearn.ensemble import RandomForestClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def prec_log(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test = X_test.reshape((X_test.shape[0], -1))
    clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_log={:.6f}%'.format(prec*100.0))
    return clf, y_pred
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def _sanitize_value(x):
    """
    Performs cleaning steps on the data so various type comparisons can
    be performed correctly.
    """
    if isinstance(x, _six.string_types + _six.integer_types + (float,)):
        return x
    elif _HAS_SKLEARN and _sp.issparse(x):
        return x.todense()
    elif isinstance(x, _np.ndarray):
        return x
    elif isinstance(x, tuple):
        return (_sanitize_value(v) for v in x)
    elif isinstance(x, list):
        return [_sanitize_value(v) for v in x]
    elif isinstance(x, dict):
        return dict( (_sanitize_value(k), _sanitize_value(v)) for k, v in x.items())
    else:
        assert False, str(x)
项目:polylearn    作者:scikit-learn-contrib    | 项目源码 | 文件源码
def test_safe_power_sparse():
    # TODO maybe move to a util module or something
    # scikit-learn has safe_sqr but not general power

    X_quad = X ** 4
    # assert X stays sparse
    X_sp = sp.csr_matrix(X)
    for sp_format in ('csr', 'csc', 'coo'):  # not working with lil for now
        X_sp = X_sp.asformat(sp_format)
        X_sp_quad = safe_power(X_sp, degree=4)
        assert_true(sp.issparse(X_sp_quad),
                    msg="safe_power breaks {} sparsity".format(sp_format))
        assert_array_almost_equal(X_quad,
                                  X_sp_quad.A,
                                  err_msg="safe_power differs for {} and "
                                          "dense".format(sp_format))
项目:vsmlib    作者:undertherain    | 项目源码 | 文件源码
def filter_rows(self, ids_of_interest):
        # return (cooccurrence[1].todense()[:width])
        xdim = self.matrix.shape[1]
        dense = np.empty([0, xdim])
        # dense=np.empty([0,width])
        for i in ids_of_interest:
            if i < 0:
                continue
            if sparse.issparse(self.matrix):
                row = self.matrix[i].todense()
            else:
                row = self.matrix[i]
            row = np.asarray(row)
            row = np.reshape(row, (xdim))
            # dense=np.vstack([dense,row[:width]])
            dense = np.vstack([dense, row])
        return dense
项目:kaggle-allstate-claims-severity    作者:alno    | 项目源码 | 文件源码
def batch_generator(X, y=None, batch_size=128, shuffle=False):
    index = np.arange(X.shape[0])

    while True:
        if shuffle:
            np.random.shuffle(index)

        batch_start = 0
        while batch_start < X.shape[0]:
            batch_index = index[batch_start:batch_start + batch_size]
            batch_start += batch_size

            X_batch = X[batch_index, :]

            if sp.issparse(X_batch):
                X_batch = X_batch.toarray()

            if y is None:
                yield X_batch
            else:
                yield X_batch, y[batch_index]
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def indexable(*iterables):
    """Make arrays indexable for cross-validation.
    Checks consistent length, passes through None, and ensures that everything
    can be indexed by converting sparse matrices to csr and converting
    non-interable objects to arrays.
    Parameters
    ----------
    *iterables : lists, dataframes, arrays, sparse matrices
        List of objects to ensure sliceability.
    """
    result = []
    for X in iterables:
        if sp.issparse(X):
            result.append(X.tocsr())
        elif hasattr(X, "__getitem__") or hasattr(X, "iloc"):
            result.append(X)
        elif X is None:
            result.append(X)
        else:
            result.append(np.array(X))
    check_consistent_length(*result)
    return result
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def update(self):
        """Updated output array and shift to input if stacked.

        If stacking is en force, the output array will replace the input
        array, and used as input for subsequent jobs. Sparse matrices are
        force-converted to ``csr`` format.
        """
        if self.predict_out is None:
            return
        if (issparse(self.predict_out) and not
                self.predict_out.__class__.__name__.startswith('csr')):
            # Enforce csr on spare matrices
            self.predict_out = self.predict_out.tocsr()

        if self.stack:
            self.predict_in = self.predict_out
            self.rebase()
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def _propagate_features(self, task):
        """Propagate features from input array to output array."""
        p_out, p_in = self.job.predict_out, self.job.predict_in

        # Check for loss of obs between layers (i.e. with blendindex)
        n_in, n_out = p_in.shape[0], p_out.shape[0]
        r = int(n_in - n_out)

        if not issparse(p_in):
            # Simple item setting
            p_out[:, :task.n_feature_prop] = p_in[r:, task.propagate_features]
        else:
            # Need to populate propagated features using scipy sparse hstack
            self.job.predict_out = hstack(
                [p_in[r:, task.propagate_features],
                 p_out[:, task.n_feature_prop:]]
            ).tolil()
项目:coordinates    作者:markovmodel    | 项目源码 | 文件源码
def ensure_ndarray_or_sparse(A, shape=None, uniform=None, ndim=None, size=None, dtype=None, kind=None):
    r""" Ensures A is an ndarray or a scipy sparse matrix and does an assert_array with the given parameters

    Returns
    -------
    A : ndarray
        If A is already an ndarray, it is just returned. Otherwise this is an independent copy as an ndarray

    """
    if not isinstance(A, np.ndarray) and not scisp.issparse(A):
        try:
            A = np.array(A)
        except:
            raise AssertionError('Given argument cannot be converted to an ndarray:\n'+str(A))
    assert_array(A, shape=shape, uniform=uniform, ndim=ndim, size=size, dtype=dtype, kind=kind)
    return A
项目:ip-nonlinear-solver    作者:antonior92    | 项目源码 | 文件源码
def _concatenate_dense_jac(jac_list):
    # Read sequentially all jacobians.
    # Convert all values to numpy arrays.
    jac_ineq_list = []
    jac_eq_list = []
    for jac_tuple in jac_list:
        J_ineq, J_eq = jac_tuple
        if spc.issparse(J_ineq):
            jac_ineq_list += [J_ineq.toarray()]
        else:
            jac_ineq_list += [np.atleast_2d(J_ineq)]
        if spc.issparse(J_eq):
            jac_eq_list += [J_eq.toarray()]
        else:
            jac_eq_list += [np.atleast_2d(J_eq)]
    # Concatenate all
    J_ineq = np.vstack(jac_ineq_list)
    J_eq = np.vstack(jac_eq_list)
    # Return
    return J_ineq, J_eq
项目:ip-nonlinear-solver    作者:antonior92    | 项目源码 | 文件源码
def _compute_jacobian(self, J_eq, J_ineq, s):
        if self.n_ineq == 0:
            return J_eq
        else:
            if spc.issparse(J_eq) or spc.issparse(J_ineq):
                # It is expected that J_eq and J_ineq
                # are already `csr_matrix` because of
                # the way ``BoxConstraint``, ``NonlinearConstraint``
                # and ``LinearConstraint`` are defined.
                J_eq = spc.csr_matrix(J_eq)
                J_ineq = spc.csr_matrix(J_ineq)
                return self._assemble_sparse_jacobian(J_eq, J_ineq, s)
            else:
                S = np.diag(s)
                zeros = np.zeros((self.n_eq, self.n_ineq))
                # Convert to matrix
                if spc.issparse(J_ineq):
                    J_ineq = J_ineq.toarray()
                if spc.issparse(J_eq):
                    J_eq = J_eq.toarray()
                # Concatenate matrices
                return np.asarray(np.bmat([[J_eq, zeros],
                                           [J_ineq, S]]))
项目:SINDy    作者:loiseaujc    | 项目源码 | 文件源码
def scipy_sparse_to_cvx_sparse(x):
    '''
    This function takes as input as SciPy sparse matrix and converts it into
    a CVX sparse one.

    Inputs:
    ------
        x : SciPy sparse matrix.

    Outputs:
    -------
        y : CVX sparse matrix.
    '''

    # --> Check that the input matrix is indeed a scipy sparse matrix.
    if sparse.issparse(x) is not True:
        raise ValueError('Input matrix is not a SciPy sparse matrix.')

    # --> Convert x to COOdinate format.
    coo = x.tocoo()

    # --> Create the corresponding cvx sparse matrix.
    y = spmatrix(coo.data, coo.row.tolist(), coo.col.tolist())

    return y
项目:muffnn    作者:civisanalytics    | 项目源码 | 文件源码
def _make_feed_dict(self, X, inverse=False, training=False):
        # Make the dictionary mapping tensor placeholders to input data.

        # Convert sparse inputs to dense.
        if sp.issparse(X):
            X = X.todense().A

        if inverse:
            feed_dict = {self._encoded_values: X}
        else:
            feed_dict = {self._input_values: X}

        # If not training, turn off dropout (i.e., set keep_prob = 1.0).
        feed_dict[self._keep_prob] = self.keep_prob if training else 1.0

        feed_dict[self._sigmoid_msk] \
            = self._sigmoid_msk_values[0:X.shape[0], :]

        feed_dict[self._default_msk] \
            = self._default_msk_values[0:X.shape[0], :]

        feed_dict[self._softmax_msks] \
            = self._softmax_msks_values[:, 0:X.shape[0], :]

        return feed_dict
项目:mle_rev    作者:trendelkampschroer    | 项目源码 | 文件源码
def mydot(A, B):
    r"""Dot-product that can handle dense and sparse arrays

    Parameters
    ----------
    A : numpy ndarray or scipy sparse matrix
        The first factor
    B : numpy ndarray or scipy sparse matrix
        The second factor

    Returns
    C : numpy ndarray or scipy sparse matrix
        The dot-product of A and B

    """
    if issparse(A) :
        return A.dot(B)
    elif issparse(B):
        return (B.T.dot(A.T)).T
    else:
        return np.dot(A, B)
项目:mle_rev    作者:trendelkampschroer    | 项目源码 | 文件源码
def convert_solution(z, Cs):
    if issparse(Cs):
        Cs = Cs.toarray()
    M = Cs.shape[0]
    x = z[0:M]
    y = z[M:]

    w=np.exp(y)
    pi=w/w.sum()

    X=pi[:,np.newaxis]*x[np.newaxis,:]
    Y=X+np.transpose(X)
    denom=Y
    enum=Cs*np.transpose(pi)
    P=enum/denom
    ind=np.diag_indices(Cs.shape[0])
    P[ind]=0.0
    rowsums=P.sum(axis=1)
    P[ind]=1.0-rowsums
    return pi, P

###############################################################################
# Objective, Gradient, and Hessian
###############################################################################
项目:mle_rev    作者:trendelkampschroer    | 项目源码 | 文件源码
def mydot(A, B):
    r"""Dot-product that can handle dense and sparse arrays

    Parameters
    ----------
    A : numpy ndarray or scipy sparse matrix
        The first factor
    B : numpy ndarray or scipy sparse matrix
        The second factor

    Returns
    C : numpy ndarray or scipy sparse matrix
        The dot-product of A and B

    """
    if issparse(A) :
        return A.dot(B)
    elif issparse(B):
        return (B.T.dot(A.T)).T
    else:
        return np.dot(A, B)
项目:scanpy    作者:theislab    | 项目源码 | 文件源码
def get_knn_edges_sparse(dmat, k):
    edge_dict = {}
    if not issparse(dmat):
        return get_knn_edges(dmat,k)
    else:
        for i in range(dmat.shape[0]):
            l=1
            saved_values={}
            while l<k:
                row = dmat.getrow(i)
                data_index=row.data.argmin()
                j=row.indices[data_index]
                saved_values[j] = dmat[i, j]
                if i != j:
                    ii, jj = tuple(sorted([i, j]))
                    edge_dict[(ii, jj)] = dmat[i, j]
                dmat[i, j] = inf
                l = l + 1
            # Rewrite safed values:
            for j in saved_values:
                dmat[i, j] = saved_values[j]
    return edge_dict.keys()
项目:scanpy    作者:theislab    | 项目源码 | 文件源码
def log1p(data, copy=False):
    """Logarithmize the data matrix.

    Computes `X = log(X + 1)`, where `log` denotes the natural logrithm.

    Parameters
    ----------
    data : array-like or AnnData
        The data matrix.
    copy : bool (default: False)
        If an AnnData is passed, determines whether a copy is returned.

    Returns
    -------
    Returns or updates data, depending on `copy`.
    """
    if isinstance(data, AnnData):
        adata = data.copy() if copy else data
        adata.X = log1p(data.X)
        return adata if copy else None
    X = data  # proceed with data matrix
    if not issparse(X):
        return np.log1p(X)
    else:
        return X.log1p()
项目:scanpy    作者:theislab    | 项目源码 | 文件源码
def _get_mean_var(X):
    # - using sklearn.StandardScaler throws an error related to
    #   int to long trafo for very large matrices
    # - using X.multiply is slower
    if True:
        mean = X.mean(axis=0)
        if issparse(X):
            mean_sq = X.multiply(X).mean(axis=0)
            mean = mean.A1
            mean_sq = mean_sq.A1
        else:
            mean_sq = np.multiply(X, X).mean(axis=0)
        # enforece R convention (unbiased estimator) for variance
        var = (mean_sq - mean**2) * (X.shape[0]/(X.shape[0]-1))
    else:
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler(with_mean=False).partial_fit(X)
        mean = scaler.mean_
        # enforce R convention (unbiased estimator)
        var = scaler.var_ * (X.shape[0]/(X.shape[0]-1))
    return mean, var
项目:scanpy    作者:theislab    | 项目源码 | 文件源码
def _scale(X, zero_center=True):
    # - using sklearn.StandardScaler throws an error related to
    #   int to long trafo for very large matrices
    # - using X.multiply is slower
    #   the result differs very slightly, why?
    if True:
        mean, var = _get_mean_var(X)
        scale = np.sqrt(var)
        if issparse(X):
            if zero_center: raise ValueError('Cannot zero-center sparse matrix.')
            sparsefuncs.inplace_column_scale(X, 1/scale)
        else:
            X -= mean
            X /= scale
    else:
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler(with_mean=zero_center, copy=False).partial_fit(X)
        # user R convention (unbiased estimator)
        scaler.scale_ *= np.sqrt(X.shape[0]/(X.shape[0]-1))
        scaler.transform(X)
项目:gcforest    作者:w821881341    | 项目源码 | 文件源码
def prec_ets(n_trees, X_train, y_train, X_test, y_test, random_state=None):
    """
    ExtraTrees
    """
    from sklearn.ensemble import ExtraTreesClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = ExtraTreesClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1, random_state=random_state)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_ets{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred
项目:gcforest    作者:w821881341    | 项目源码 | 文件源码
def prec_rf(n_trees, X_train, y_train, X_test, y_test):
    """
    ExtraTrees
    """
    from sklearn.ensemble import RandomForestClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred
项目:gcforest    作者:w821881341    | 项目源码 | 文件源码
def prec_log(X_train, y_train, X_test, y_test):
    from sklearn.linear_model import LogisticRegression
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    X_train = X_train.reshape((X_train.shape[0], -1))
    X_test = X_test.reshape((X_test.shape[0], -1))
    clf = LogisticRegression(solver='sag', n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_log={:.6f}%'.format(prec*100.0))
    return clf, y_pred
项目:RandomForestClustering    作者:joshloyal    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)

        if sp.issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        X_, y_ = generate_discriminative_dataset(X)

        super(RandomForestEmbedding, self).fit(X_, y_,
                                               sample_weight=sample_weight)

        self.one_hot_encoder_ = OneHotEncoder(sparse=True)
        if self.sparse_output:
            return self.one_hot_encoder_.fit_transform(self.apply(X))
        return self.apply(X)
项目:loompy    作者:linnarsson-lab    | 项目源码 | 文件源码
def normalize_attr_array(a: Any) -> np.ndarray:
    """
    Take all kinds of array-like inputs and normalize to a one-dimensional np.ndarray
    """
    if type(a) is np.ndarray:
        return a
    elif type(a) is np.matrix:
        if a.shape[0] == 1:
            return np.array(a)[0, :]
        elif a.shape[1] == 1:
            return np.array(a)[:, 0]
        else:
            raise ValueError("Attribute values must be 1-dimensional.")
    elif type(a) is list or type(a) is tuple:
        return np.array(a)
    elif sparse.issparse(a):
        return normalize_attr_array(a.todense())
    else:
        raise ValueError("Argument must be a list, tuple, numpy matrix, numpy ndarray or sparse matrix.")
项目:spherecluster    作者:clara-labs    | 项目源码 | 文件源码
def _check_fit_data(self, X):
        """Verify that the number of samples given is larger than k"""
        X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32])
        n_samples, n_features = X.shape
        if X.shape[0] < self.n_clusters:
            raise ValueError("n_samples=%d should be >= n_clusters=%d" % (
                X.shape[0], self.n_clusters))

        for ee in range(n_samples):
            if sp.issparse(X):
                n = sp.linalg.norm(X[ee, :])
            else:
                n = np.linalg.norm(X[ee, :])

            if np.abs(n - 1.) > 1e-4:
                raise ValueError("Data l2-norm must be 1, found {}".format(n))

        return X
项目:spherecluster    作者:clara-labs    | 项目源码 | 文件源码
def _check_test_data(self, X):
        X = check_array(X, accept_sparse='csr', dtype=FLOAT_DTYPES,
                        warn_on_dtype=True)
        n_samples, n_features = X.shape
        expected_n_features = self.cluster_centers_.shape[1]
        if not n_features == expected_n_features:
            raise ValueError("Incorrect number of features. "
                             "Got %d features, expected %d" % (
                                 n_features, expected_n_features))

        for ee in range(n_samples):
            if sp.issparse(X):
                n = sp.linalg.norm(X[ee, :])
            else:
                n = np.linalg.norm(X[ee, :])

            if np.abs(n - 1.) > 1e-4:
                raise ValueError("Data l2-norm must be 1, found {}".format(n))

        return X
项目:ycml    作者:skylander86    | 项目源码 | 文件源码
def keras_fit(self, X, Y, *, nn_model=None, validation_data=None, resume=None, **fit_args):
        if nn_model is None: nn_model = getattr(self, self.NN_MODEL_ATTRIBUTE)

        if not self._pre_fit_setup(nn_model, resume=resume, **fit_args): return

        if sps.issparse(X): X = X.toarray()
        if sps.issparse(Y): Y = Y.toarray()
        if validation_data is not None:
            X_validation, Y_validation = validation_data
            validation_data = (X_validation.toarray() if sps.issparse(X_validation) else X_validation, Y_validation.toarray() if sps.issparse(Y_validation) else Y_validation)
        #end if

        logger.info('{} instances used for training and {} instances used for validation.'.format(Y.shape[0], validation_data[1].shape[0] if validation_data else int(self.validation_size * Y.shape[0])))

        return nn_model.fit(X, Y, validation_data=validation_data, validation_split=0.0 if validation_data is not None else self.validation_size, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=self.build_callbacks(), initial_epoch=self.initial_epoch, **fit_args)
    #end def
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def transform(self, X):
        """Transform X separately by each transformer, concatenate results.

        Parameters
        ----------
        X : array-like or sparse matrix, shape (n_samples, n_features)
            Input data to be transformed.

        Returns
        -------
        X_t : array-like or sparse matrix, shape (n_samples, sum_n_components)
            hstack of results of transformers. sum_n_components is the
            sum of n_components (output dimension) over transformers.
        """
        Xs = Parallel(n_jobs=self.n_jobs)(
            delayed(_transform_one)(trans, name, X, self.transformer_weights)
            for name, trans in self.transformer_list)
        if any(sparse.issparse(f) for f in Xs):
            Xs = sparse.hstack(Xs).tocsr()
        else:
            Xs = np.hstack(Xs)
        return Xs
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _compute_distances(self, query, candidates):
        """Computes the cosine distance.

        Distance is from the query to points in the candidates array.
        Returns argsort of distances in the candidates
        array and sorted distances.
        """
        if candidates.shape == (0,):
            # needed since _fit_X[np.array([])] doesn't work if _fit_X sparse
            return np.empty(0, dtype=np.int), np.empty(0, dtype=float)

        if sparse.issparse(self._fit_X):
            candidate_X = self._fit_X[candidates]
        else:
            candidate_X = self._fit_X.take(candidates, axis=0, mode='clip')
        distances = pairwise_distances(query, candidate_X,
                                       metric='cosine')[0]
        distance_positions = np.argsort(distances)
        distances = distances.take(distance_positions, mode='clip', axis=0)
        return distance_positions, distances
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_stratified_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[4, 1],
                                [0, 0],
                                [1, 1],
                                [1, 4],
                                [1, 1]]))

    clf = DummyClassifier(strategy="stratified", random_state=0)
    clf.fit(X, y)

    X = [[0]] * 500
    y_pred = clf.predict(X)
    assert_true(sp.issparse(y_pred))
    y_pred = y_pred.toarray()

    for k in range(y.shape[1]):
        p = np.bincount(y_pred[:, k]) / float(len(X))
        assert_almost_equal(p[1], 3. / 5, decimal=1)
        assert_almost_equal(p[0], 1. / 5, decimal=1)
        assert_almost_equal(p[4], 1. / 5, decimal=1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_most_frequent_and_prior_strategy_sparse_target():
    X = [[0]] * 5  # ignored
    y = sp.csc_matrix(np.array([[1, 0],
                                [1, 3],
                                [4, 0],
                                [0, 1],
                                [1, 0]]))

    n_samples = len(X)
    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
    for strategy in ("most_frequent", "prior"):
        clf = DummyClassifier(strategy=strategy, random_state=0)
        clf.fit(X, y)

        y_pred = clf.predict(X)
        assert_true(sp.issparse(y_pred))
        assert_array_equal(y_pred.toarray(), y_expected)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_SparseRandomProjection_output_representation():
    for SparseRandomProjection in all_SparseRandomProjection:
        # when using sparse input, the projected data can be forced to be a
        # dense numpy array
        rp = SparseRandomProjection(n_components=10, dense_output=True,
                                    random_state=0)
        rp.fit(data)
        assert isinstance(rp.transform(data), np.ndarray)

        sparse_data = sp.csr_matrix(data)
        assert isinstance(rp.transform(sparse_data), np.ndarray)

        # the output can be left to a sparse matrix instead
        rp = SparseRandomProjection(n_components=10, dense_output=False,
                                    random_state=0)
        rp = rp.fit(data)
        # output for dense input will stay dense:
        assert isinstance(rp.transform(data), np.ndarray)

        # output for sparse output will be sparse:
        assert sp.issparse(rp.transform(sparse_data))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _randomized_logistic(X, y, weights, mask, C=1., verbose=False,
                         fit_intercept=True, tol=1e-3):
    X = X[safe_mask(X, mask)]
    y = y[mask]
    if issparse(X):
        size = len(weights)
        weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
        X = X * weight_dia
    else:
        X *= (1 - weights)

    C = np.atleast_1d(np.asarray(C, dtype=np.float64))
    scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)

    for this_C, this_scores in zip(C, scores.T):
        # XXX : would be great to do it with a warm_start ...
        clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False,
                                 fit_intercept=fit_intercept)
        clf.fit(X, y)
        this_scores[:] = np.any(
            np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0)
    return scores
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sparsify():
    # Test sparsify and densify members.
    n_samples, n_features = iris.data.shape
    target = iris.target_names[iris.target]
    clf = LogisticRegression(random_state=0).fit(iris.data, target)

    pred_d_d = clf.decision_function(iris.data)

    clf.sparsify()
    assert_true(sp.issparse(clf.coef_))
    pred_s_d = clf.decision_function(iris.data)

    sp_data = sp.coo_matrix(iris.data)
    pred_s_s = clf.decision_function(sp_data)

    clf.densify()
    pred_d_s = clf.decision_function(sp_data)

    assert_array_almost_equal(pred_d_d, pred_s_d)
    assert_array_almost_equal(pred_d_d, pred_s_s)
    assert_array_almost_equal(pred_d_d, pred_d_s)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def densify(self):
        """Convert coefficient matrix to dense array format.

        Converts the ``coef_`` member (back) to a numpy.ndarray. This is the
        default format of ``coef_`` and is required for fitting, so calling
        this method is only required on models that have previously been
        sparsified; otherwise, it is a no-op.

        Returns
        -------
        self: estimator
        """
        msg = "Estimator, %(name)s, must be fitted before densifying."
        check_is_fitted(self, "coef_", msg=msg)
        if sp.issparse(self.coef_):
            self.coef_ = self.coef_.toarray()
        return self
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _return_float_dtype(X, Y):
    """
    1. If dtype of X and Y is float32, then dtype float32 is returned.
    2. Else dtype float is returned.
    """
    if not issparse(X) and not isinstance(X, np.ndarray):
        X = np.asarray(X)

    if Y is None:
        Y_dtype = X.dtype
    elif not issparse(Y) and not isinstance(Y, np.ndarray):
        Y = np.asarray(Y)
        Y_dtype = Y.dtype
    else:
        Y_dtype = Y.dtype

    if X.dtype == Y_dtype == np.float32:
        dtype = np.float32
    else:
        dtype = np.float

    return X, Y, dtype
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def paired_manhattan_distances(X, Y):
    """Compute the L1 distances between the vectors in X and Y.

    Read more in the :ref:`User Guide <metrics>`.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)

    Y : array-like, shape (n_samples, n_features)

    Returns
    -------
    distances : ndarray (n_samples, )
    """
    X, Y = check_paired_arrays(X, Y)
    diff = X - Y
    if issparse(diff):
        diff.data = np.abs(diff.data)
        return np.squeeze(np.array(diff.sum(axis=1)))
    else:
        return np.abs(diff).sum(axis=-1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_check_sparse_arrays():
    # Ensures that checks return valid sparse matrices.
    rng = np.random.RandomState(0)
    XA = rng.random_sample((5, 4))
    XA_sparse = csr_matrix(XA)
    XB = rng.random_sample((5, 4))
    XB_sparse = csr_matrix(XB)
    XA_checked, XB_checked = check_pairwise_arrays(XA_sparse, XB_sparse)
    # compare their difference because testing csr matrices for
    # equality with '==' does not work as expected.
    assert_true(issparse(XA_checked))
    assert_equal(abs(XA_sparse - XA_checked).sum(), 0)
    assert_true(issparse(XB_checked))
    assert_equal(abs(XB_sparse - XB_checked).sum(), 0)

    XA_checked, XA_2_checked = check_pairwise_arrays(XA_sparse, XA_sparse)
    assert_true(issparse(XA_checked))
    assert_equal(abs(XA_sparse - XA_checked).sum(), 0)
    assert_true(issparse(XA_2_checked))
    assert_equal(abs(XA_2_checked - XA_checked).sum(), 0)