Python sklearn.utils 模块,check_array() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.utils.check_array()

项目:triage    作者:dssg    | 项目源码 | 文件源码
def transform(self, X):
        feature_range = self.feature_range

        X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES)

        if X.ndim == 1:
            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)

        if np.any(X > feature_range[1]) or np.any(X < feature_range[0]):
            warnings.warn(
                "You got data that are out of the range: {}"
                .format(feature_range)
            )

        X[X > feature_range[1]] = feature_range[1]
        X[X < feature_range[0]] = feature_range[0]

        return X
项目:extra-trees    作者:allrod5    | 项目源码 | 文件源码
def _validate_X_predict(
            self, X: np.ndarray, check_input: bool) -> np.ndarray:
        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError(
                    "No support for np.int64 index based sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError(
                "Number of features of the model must match the input."
                " Model n_features is %s and input n_features is %s "
                % (self.n_features_, n_features))

        return X
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def load_data():
    # Load dataset
    print("Loading dataset...")
    data = fetch_covtype(download_if_missing=True, shuffle=True,
                         random_state=RANDOM_STATE)
    X = check_array(data['data'], dtype=np.float32, order='C')
    y = (data['target'] != 1).astype(np.int)

    # Create train-test split (as [Joachims, 2006])
    print("Creating train-test split...")
    n_train = 522911
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_test = X[n_train:]
    y_test = y[n_train:]

    # Standardize first 10 features (the numerical ones)
    mean = X_train.mean(axis=0)
    std = X_train.std(axis=0)
    mean[10:] = 0.0
    std[10:] = 1.0
    X_train = (X_train - mean) / std
    X_test = (X_test - mean) / std
    return X_train, X_test, y_train, y_test
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def predict(self, X):
        """
        Predict data using the ``centroids_`` of subclusters.

        Avoid computation of the row norms of X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Input data.

        Returns
        -------
        labels : ndarray, shape(n_samples)
            Labelled data.
        """
        X = check_array(X, accept_sparse='csr')
        self._check_fit(X)
        reduced_distance = safe_sparse_dot(X, self.subcluster_centers_.T)
        reduced_distance *= -2
        reduced_distance += self._subcluster_norms
        return self.subcluster_labels_[np.argmin(reduced_distance, axis=1)]
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def test_dict_completion_missing():
    # Generate some toy data.
    rng = np.random.RandomState(0)
    U = rng.rand(100, 4)
    V = rng.rand(4, 20)
    X = np.dot(U, V)
    X = sp.csr_matrix(X)
    X_tr, X_te = train_test_split(X, train_size=0.95)
    X_tr = sp.csr_matrix(X_tr)
    X_te = sp.csr_matrix(X_te)

    mf = RecsysDictFact(n_components=4, n_epochs=1, alpha=1,
                        random_state=0,
                        detrend=True,
                        verbose=0, )

    mf.fit(X_tr)
    X_pred = mf.predict(X_te)
    rmse = sqrt(np.sum((X_te.data - X_pred.data) ** 2) / X_te.data.shape[0])
    X_te_centered = check_array(X_te, accept_sparse='csr', copy=True)
    compute_biases(X_te_centered, inplace=True)
    rmse_c = sqrt(np.sum((X_te.data
                          - X_te_centered.data) ** 2) / X_te.data.shape[0])
    assert (rmse < rmse_c)
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def partial_fit(self, X, sample_indices=None):
        """
        Update the factorization using rows from X

        Parameters
        ----------
        X: ndarray, shape (n_samples, n_features)
            Input data
        sample_indices:
            Indices for each row of X. If None, consider that row i index is i
            (useful when providing the whole data to the function)
        Returns
        -------
        self
        """
        X = check_array(X, dtype=[np.float32, np.float64], order='C')

        n_samples, n_features = X.shape
        batches = gen_batches(n_samples, self.batch_size)

        for batch in batches:
            this_X = X[batch]
            these_sample_indices = get_sub_slice(sample_indices, batch)
            self._single_batch_fit(this_X, these_sample_indices)
        return self
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def transform(self, X, y=None):
        """Apply dimensionality reduction to X.
        X is masked.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples is the number of samples
            and n_features is the number of features.
        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        """
        from sklearn.utils import check_array
        from sklearn.utils.validation import check_is_fitted
        check_is_fitted(self, ['mask_'], all_or_any=all)
        X = check_array(X)
        return X[:, self.mask_]
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def transform(self, X, y=None):
        """Apply dimensionality reduction to X.
        X is masked.
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples is the number of samples
            and n_features is the number of features.
        Returns
        -------
        X_new : array-like, shape (n_samples, n_components)
        """
        from sklearn.utils import check_array
        from sklearn.utils.validation import check_is_fitted
        check_is_fitted(self, ['mask_'], all_or_any=all)
        if hasattr(X, 'columns'):
            X = X.values
        X = check_array(X[:, self.mask_])
        return X
项目:mlens    作者:flennerhag    | 项目源码 | 文件源码
def load_data(dtype=np.float32, order='F'):
    """Load the data, then cache and memmap the train/test split"""
    ######################################################################
    # Load dataset
    safe_print("Loading dataset...")
    data = fetch_mldata('MNIST original')
    X = check_array(data['data'], dtype=dtype, order=order)
    y = data["target"]

    # Normalize features
    X = X / 255

    # Create train-test split (as [Joachims, 2006])
    safe_print("Creating train-test split...")
    n_train = 60000
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_test = X[n_train:]
    y_test = y[n_train:]

    return X_train, X_test, y_train, y_test
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype='f')

        n_features = X.shape[1]
        if self._n_features != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s "
                             % (self._n_features, n_features))

        return X
项目:pines    作者:dmitru    | 项目源码 | 文件源码
def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self._tree is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype='f')

        n_features = X.shape[1]
        if self._n_features != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s "
                             % (self._n_features, n_features))

        return X
项目:kenchi    作者:Y-oHr-N    | 项目源码 | 文件源码
def anomaly_score(self, X=None):
        """Compute anomaly scores for test samples.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features), default None
            Test samples.

        Returns
        -------
        y_score : array-like of shape (n_samples,)
            Anomaly scores for test samples.
        """

        check_is_fitted(self, ['_glasso'])

        if X is None:
            return self.y_score_
        else:
            X = check_array(X)

            return self._glasso.mahalanobis(X)
项目:kenchi    作者:Y-oHr-N    | 项目源码 | 文件源码
def anomaly_score(self, X=None):
        """Compute anomaly scores for test samples.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features), default None
            Test samples.

        Returns
        -------
        y_score : array-like of shape (n_samples,)
            Anomaly scores for test samples.
        """

        check_is_fitted(self, '_gmm')

        if X is None:
            return self.y_score_
        else:
            X = check_array(X)

            return -self._gmm.score_samples(X)
项目:kenchi    作者:Y-oHr-N    | 项目源码 | 文件源码
def anomaly_score(self, X=None):
        """Compute anomaly scores for test samples.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features), default None
            Test samples.

        Returns
        -------
        y_score : array-like of shape (n_samples,)
            Anomaly scores for test samples.
        """

        check_is_fitted(self, '_normalizer')

        if X is None:
            return self.y_score_
        else:
            X     = check_array(X)

            if not self.assume_normalized:
                X = self._normalizer.transform(X)

            return 1.0 - X @ self.mean_direction_
项目:kenchi    作者:Y-oHr-N    | 项目源码 | 文件源码
def anomaly_score(self, X=None):
        """Compute anomaly scores for test samples.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features), default None
            Test samples.

        Returns
        -------
        y_score : array-like of shape (n_samples,)
            Anomaly scores for test samples.
        """

        check_is_fitted(self, '_kde')

        if X is None:
            return self.y_score_
        else:
            X = check_array(X)

            return -self._kde.score_samples(X)
项目:traffic-prediction    作者:JonnoFTW    | 项目源码 | 文件源码
def mean_absolute_percentage_error(y_true, y_pred):
    """
    Use of this metric is not recommended; for illustration only.
    See other regression metrics on sklearn docs:
      http://scikit-learn.org/stable/modules/classes.html#regression-metrics
    Use like any other metric
    >>> y_true = [3, -0.5, 2, 7]; y_pred = [2.5, -0.3, 2, 8]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    Out[]: 24.791666666666668
    """

    # y_true, y_pred = check_array(y_true), check_array(y_pred)

    # Note: does not handle mix 1d representation
    # if _is_1d(y_true):
    #    y_true, y_pred = _check_1d_array(y_true, y_pred)

    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def predict(self, X):
        """Predict using the ORFF ridge model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        C : {array}, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        """
        check_is_fitted(self, ['coefs_', 'linop_'], all_or_any=all)
        X = check_array(X)
        return self._decision_function(X)
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def predict(self, X):
        """Predict using ONORMA model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        C : {array}, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        """
        check_is_fitted(self, ['coefs_', 't_', 'p_',
                               'X_seen_', 'y_seen_'], all_or_any=all)
        X = check_array(X)
        linop = self.ov_kernel_(self.X_seen_)
        pred = linop(X) * self.coefs_[:self.t_ * self.p_]
        return pred.reshape(X.shape[0], -1) if linop.p > 1 else pred
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def predict(self, X):
        """Predict using the OVK ridge model.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        C : {array}, shape = [n_samples] or [n_samples, n_targets]
            Returns predicted values.
        """
        check_is_fitted(self, ['dual_coefs_', 'linop_'], all_or_any=all)
        X = check_array(X, force_all_finite=True, accept_sparse=False,
                        ensure_2d=True)
        return self._decision_function(X)
项目:operalib    作者:operalib    | 项目源码 | 文件源码
def predict(self, X):
        """Predict conditional quantiles.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Samples.

        Returns
        -------
        y : {array}, shape = [n_samples, n_quantiles]
            Returns predicted values for each prescribed quantile level.
        """
        check_is_fitted(self, ['model_', 'linop_'], all_or_any=all)
        X = check_array(X)
        return self._decision_function(X)
项目:chainer_sklearn    作者:corochann    | 项目源码 | 文件源码
def _check_X_y(self, X, y=None):
        """

        :param X: 
        :param y (~numpy.ndarray):
        :return: 
        """
        if not is_dataset(X) and not isinstance(X, list):
            if isinstance(X, numpy.ndarray):
                X = check_array(X, dtype=self._data_x_dtype)
            else:
                print('[WARNING] skip check type for dataset X with type {}'
                      .format(type(X)))
        if y is not None:
            y = check_array(y, dtype=self._data_y_dtype, ensure_2d=False)
            if y.ndim == 1:
                y = y[:, None]
        return X, y
项目:sentence-classification    作者:bgmartins    | 项目源码 | 文件源码
def predict(self, X):
        """Predict the class labels for the provided data
        Parameters
        ----------
        X : scipy.sparse matrix, shape (n_test_samples, vocab_size)
            Test samples.

        Returns
        -------
        y : array of shape [n_samples]
            Class labels for each data sample.
        """
        X = check_array(X, accept_sparse='csr', copy=True)
        X = normalize(X, norm='l1', copy=False)
        dist = self._pairwise_wmd(sp.sparse.csr_matrix(X))
        return super(WordMoversKNN, self).predict(dist)
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def transform(self, X):
        feature_range = self.feature_range

        X = check_array(X, copy=self.copy, ensure_2d=False, dtype=FLOAT_DTYPES)

        if X.ndim == 1:
            warnings.warn(DEPRECATION_MSG_1D, DeprecationWarning)


        if np.any(X > feature_range[1]) or np.any(X < feature_range[0]):
            warnings.warn(
                "You got data that are out of the range: {}"
                .format(feature_range)
            )


        X[X > feature_range[1]] = feature_range[1]
        X[X < feature_range[0]] = feature_range[0]

        return X
项目:RandomForestClustering    作者:joshloyal    | 项目源码 | 文件源码
def predict(self, X):

        check_is_fitted(self, "cluster_centers_")

        # Check that the array is good and attempt to convert it to
        # Numpy array if possible
        X = check_array(X)

        # Apply distance metric wrt. cluster centers (medoids)
        D = self.distance_func(X, Y=self.cluster_centers_)

        # Assign data points to clusters based on
        # which cluster assignment yields
        # the smallest distance
        labels = np.argmin(D, axis=1)

        return labels
项目:RandomForestClustering    作者:joshloyal    | 项目源码 | 文件源码
def fit_transform(self, X, y=None, sample_weight=None):
        X = check_array(X, accept_sparse=['csc'], ensure_2d=False)

        if sp.issparse(X):
            # Pre-sort indices to avoid that each individual tree of the
            # ensemble sorts the indices.
            X.sort_indices()

        X_, y_ = generate_discriminative_dataset(X)

        super(RandomForestEmbedding, self).fit(X_, y_,
                                               sample_weight=sample_weight)

        self.one_hot_encoder_ = OneHotEncoder(sparse=True)
        if self.sparse_output:
            return self.one_hot_encoder_.fit_transform(self.apply(X))
        return self.apply(X)
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def transform(self, X, y=None):

        # scikit-learn checks
        X = check_array(X)

        if X.shape[1] != len(self.maximums_):
            raise ValueError("X has different shape than during fitting. "
                             "Expected %d, got %d." % (len(self.maximums_), X.shape[1]))

        return np.vstack((
            np.array([
                np.cos(2 * np.pi * x / (maximum + 1))
                for x, maximum in zip(X.T, self.maximums_)
            ]),
            np.array([
                np.sin(2 * np.pi * x / (maximum + 1))
                for x, maximum in zip(X.T, self.maximums_)
            ])
        )).T
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def transform(self, X, y=None):
        """Binarize X based on the fitted cut points."""

        # scikit-learn checks
        X = check_array(X)

        if self.cut_points is None:
            raise NotFittedError('Estimator not fitted, call `fit` before exploiting the model.')

        if X.shape[1] != len(self.cut_points):
            raise ValueError("Provided array's dimensions do not match with the ones from the "
                             "array `fit` was called on.")

        binned = np.array([
            np.digitize(x, self.cut_points[i])
            if len(self.cut_points[i]) > 0
            else np.zeros(x.shape)
            for i, x in enumerate(X.T)
        ]).T

        return binned
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def load_data(dtype=np.float32, order='F'):
    """Load the data, then cache and memmap the train/test split"""
    ######################################################################
    ## Load dataset
    print("Loading dataset...")
    data = fetch_mldata('MNIST original')
    X = check_array(data['data'], dtype=dtype, order=order)
    y = data["target"]

    # Normalize features
    X = X / 255

    ## Create train-test split (as [Joachims, 2006])
    print("Creating train-test split...")
    n_train = 60000
    X_train = X[:n_train]
    y_train = y[:n_train]
    X_test = X[n_train:]
    y_test = y[n_train:]

    return X_train, X_test, y_train, y_test
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_check_input_false():
    X, y, _, _ = build_dataset(n_samples=20, n_features=10)
    X = check_array(X, order='F', dtype='float64')
    y = check_array(X, order='F', dtype='float64')
    clf = ElasticNet(selection='cyclic', tol=1e-8)
    # Check that no error is raised if data is provided in the right format
    clf.fit(X, y, check_input=False)
    X = check_array(X, order='F', dtype='float32')
    clf.fit(X, y, check_input=True)
    # Check that an error is raised if data is provided in the wrong dtype,
    # because of check bypassing
    assert_raises(ValueError, clf.fit, X, y, check_input=False)

    # With no input checking, providing X in C order should result in false
    # computation
    X = check_array(X, order='C', dtype='float64')
    assert_raises(ValueError, clf.fit, X, y, check_input=False)
项目:pca    作者:vighneshbirodkar    | 项目源码 | 文件源码
def transform(self, X, y=None):
        """Apply dimensionality reduction on X.

        X is projected on the principal components previous extracted
        from a training set.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            New data, where n_samples in the number of samples
            and n_features is the number of features.

        Returns
        -------
        X_transformed : array-like, shape (n_samples, n_components)
        """
        check_is_fitted(self, 'center_')

        X = check_array(X)
        if self.center_ is not None:
            X = X - self.center_
        X_transformed = fast_dot(X, self.components_.T)
        return X_transformed
项目:tf_base    作者:ozansener    | 项目源码 | 文件源码
def _check_array(self, X):

        X = check_array(X)

        # Check that the number of clusters is less than or equal to
        # the number of samples
        if self.n_clusters > X.shape[0]:
            raise ValueError("The number of medoids " +
                             "({}) ".format(self.n_clusters) +
                             "must be larger than the number " +
                             "of samples ({})".format(X.shape[0]))

        return X
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def get_coef(self, X):
        qr, qraux = self.qr, self.qraux
        n, p = qr.shape

        # sanity check
        assert isinstance(qr, np.ndarray), 'internal error: QR should be a np.ndarray but got %s' % type(qr)
        assert isinstance(qraux, np.ndarray), 'internal error: qraux should be a np.ndarray but got %s' % type(qraux)

        # validate input array
        X = check_array(X, dtype='numeric', copy=True, order='F')
        nx, ny = X.shape
        if nx != n:
            raise ValueError('qr and X must have same number of rows')

        # check on size
        _validate_matrix_size(n, p)

        # get the rank of the decomposition
        k = self.rank

        # get ix vector
        # if p > n:
        #   ix = np.ones(n + (p - n)) * np.nan
        #   ix[:n] = np.arange(n) # i.e., array([0,1,2,nan,nan,nan])
        # else:
        #   ix = np.arange(n)

        # set up the structures to alter
        coef, info = (np.zeros((k, ny), dtype=np.double, order='F'),
                      np.zeros(1, dtype=np.int, order='F'))

        # call the fortran module IN PLACE
        _safecall(dqrsl.dqrcf, qr, n, k, qraux, X, ny, coef, 0)

        # post-processing
        # if k < p:
        #   cf = np.ones((p,ny)) * np.nan
        #   cf[self.pivot[np.arange(k)], :] = coef
        return coef if not k < p else coef[self.pivot[np.arange(k)], :]
项目:char-rbm    作者:colinmorris    | 项目源码 | 文件源码
def partial_fit(self, X, y=None):
        """Fit the model to the data X which should contain a partial
        segment of the data.

        X : array-like, shape (n_samples, n_features)
            Training data.

        Returns
        -------
        self : BernoulliRBM
            The fitted model.
        """
        X = check_array(X, accept_sparse='csr', dtype=np.float)
        if not hasattr(self, 'components_'):
            self.components_ = np.asarray(
                self.rng_.normal(
                    0,
                    0.01,
                    (self.n_components, X.shape[1])
                ),
                order='fortran')
        if not hasattr(self, 'intercept_hidden_'):
            self.intercept_hidden_ = np.zeros(self.n_components, )
        if not hasattr(self, 'intercept_visible_'):
            self.intercept_visible_ = np.zeros(X.shape[1], )
        if not hasattr(self, 'h_samples_'):
            self.h_samples_ = np.zeros((self.batch_size, self.n_components))

        self._fit(X)
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def predict(self, X, check_input=True):
        """Predict class or regression value for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted classes, or the predict values.
        """
        if getattr(X, "dtype", None) != DTYPE or X.ndim != 2:
            X = check_array(X, dtype=DTYPE)

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first")

        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s "
                             % (self.n_features_, n_features))

        if self.tree_.get('direction') > 0:
            return ((X[:, self.tree_.get('best_dim')] > self.tree_.get('threshold')) * 2) - 1
        else:
            return ((X[:, self.tree_.get('best_dim')] <= self.tree_.get('threshold')) * 2) - 1
项目:skboost    作者:hbldh    | 项目源码 | 文件源码
def predict(self, X, check_input=True):
        """Predict class or regression value for X.

        For a classification model, the predicted class for each sample in X is
        returned. For a regression model, the predicted value based on X is
        returned.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        y : array of shape = [n_samples] or [n_samples, n_outputs]
            The predicted classes, or the predict values.
        """
        X = check_array(X, dtype=DTYPE, accept_sparse="csr")
        if issparse(X) and (X.indices.dtype != np.intc or
                                    X.indptr.dtype != np.intc):
            raise ValueError("No support for np.int64 index based "
                             "sparse matrices")

        n_samples, n_features = X.shape

        if self.tree_ is None:
            raise Exception("Tree not initialized. Perform a fit first")

        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             " match the input. Model n_features is %s and "
                             " input n_features is %s "
                             % (self.n_features_, n_features))

        return (self.tree_.get('coefficient') *
                (X[:, self.tree_.get('best_dim')] > self.tree_.get('threshold')) +
                self.tree_.get('constant'))
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def fit(self, X, y=None):
        """Do nothing and return the estimator unchanged

        This method is just there to implement the usual API and hence
        work in pipelines.
        """
        X = check_array(X, accept_sparse='csr')
        return self
项目:AutoML-Challenge    作者:postech-mlg-exbrain    | 项目源码 | 文件源码
def transform(self, X, y=None, copy=None):
        """Scale each non zero row of X to unit norm

        Parameters
        ----------
        X : array or scipy.sparse matrix with shape [n_samples, n_features]
            The data to normalize, row by row. scipy.sparse matrices should be
            in CSR format to avoid an un-necessary copy.
        """
        copy = copy if copy is not None else self.copy
        X = check_array(X, accept_sparse='csr')
        return normalize(X, norm=self.norm, axis=1, copy=copy)
项目:FreeDiscovery    作者:FreeDiscovery    | 项目源码 | 文件源码
def transform_lsi(self, X):
        """ LSI transform, normalized by the inverse of the eigen values"""
        X = check_array(X, accept_sparse='csr')
        return safe_sparse_dot(X, self.components_.T).dot(
                    np.diag(1./self.singular_values_[:self.n_components]))
项目:skggm    作者:skggm    | 项目源码 | 文件源码
def fit(self, X, y=None, **fit_params):
        """Fits the inverse covariance model according to the given training
        data and parameters.

        Parameters
        -----------
        X : 2D ndarray, shape (n_features, n_features)
            Input data.

        Returns
        -------
        self
        """
        X = check_array(X, ensure_min_features=2, estimator=self)
        X = as_float_array(X, copy=False, force_all_finite=False)
        self.init_coefs(X)
        if self.method == 'quic':
            (self.precision_, self.covariance_, self.opt_, self.cputime_,
             self.iters_, self.duality_gap_) = quic(
                self.sample_covariance_,
                self.lam * self.lam_scale_,
                mode=self.mode,
                tol=self.tol,
                max_iter=self.max_iter,
                Theta0=self.Theta0,
                Sigma0=self.Sigma0,
                path=self.path_,
                msg=self.verbose
            )
        else:
            raise NotImplementedError(
                "Only method='quic' has been implemented.")

        self.is_fitted = True
        return self
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def predict(self, X):
        """ Predict values of X from internal dictionary and intercepts

        Parameters
        ----------
        X: csr-matrix (n_samples, n_features)
            Matrix holding the loci of prediction

        Returns
        -------
        X_pred: csr-matrix (n_samples, n_features)
            Matrix with the same sparsity structure as X, with predicted values
        """
        if not sp.issparse(X):
            X = sp.csr_matrix(X)
        X = check_array(X, accept_sparse='csr')
        out = np.zeros_like(X.data)
        _predict(out, X.indices, X.indptr, self.code_,
                 self.components_)

        if self.detrend:
            for i in range(X.shape[0]):
                out[X.indptr[i]:X.indptr[i + 1]] += self.row_mean_[i]
            out += self.col_mean_.take(X.indices, mode='clip')

        if self.crop is not None:
            out[out > self.crop[1]] = self.crop[1]
            out[out < self.crop[0]] = self.crop[0]

        return sp.csr_matrix((out, X.indices, X.indptr), shape=X.shape)
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def score(self, X):
        """Score prediction based on root mean squared error"""
        if not sp.issparse(X):
            X = sp.csr_matrix(X)
        X = check_array(X, accept_sparse='csr')
        X_pred = self.predict(X)
        return rmse(X, X_pred)
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def rmse(X_true, X_pred):
    """Root mean squared error for two sparse matrices"""
    X_true = check_array(X_true, accept_sparse='csr')
    X_pred = check_array(X_pred, accept_sparse='csr')
    mse = np.mean((X_true.data - X_pred.data) ** 2)
    return np.sqrt(mse)
项目:modl    作者:arthurmensch    | 项目源码 | 文件源码
def fit(self, X):
        """
        Compute the factorisation X ~ code_ x components_, solving for
        D, code_ = argmin_{r2 ||D^j ||_1 + (1 - r2) || D^j ||_2^2 < 1}
        1 / 2 || X - D A ||_2 + (1 - r) || A ||_2 / 2 + r || A ||_1
        Parameters
        ----------
        X:  ndarray, shape= (n_samples, n_features)

        Returns
        -------
        self
        """
        X = check_array(X, order='C', dtype=[np.float32, np.float64])
        if self.dict_init is None:
            dict_init = X
        else:
            dict_init = check_array(self.dict_init,
                                    dtype=X.dtype.type)
        self.prepare(n_samples=X.shape[0], X=dict_init)
        # Main loop
        for _ in range(self.n_epochs):
            self.partial_fit(X)
            permutation = self.shuffle()
            X = X[permutation]
        return self
项目:NetPower_TestBed    作者:Vignesh2208    | 项目源码 | 文件源码
def score_samples(self, X, lengths=None):
        """Compute the log probability under the model and compute posteriors.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix of individual samples.

        lengths : array-like of integers, shape (n_sequences, ), optional
            Lengths of the individual sequences in ``X``. The sum of
            these should be ``n_samples``.

        Returns
        -------
        logprob : float
            Log likelihood of ``X``.

        posteriors : array, shape (n_samples, n_components)
            State-membership probabilities for each sample in ``X``.

        See Also
        --------
        score : Compute the log probability under the model.
        decode : Find most likely state sequence corresponding to ``X``.
        """
        check_is_fitted(self, "startprob_")
        self._check()

        X = check_array(X)
        n_samples = X.shape[0]
        logprob = 0
        posteriors = np.zeros((n_samples, self.n_components))
        for i, j in iter_from_X_lengths(X, lengths):
            framelogprob = self._compute_log_likelihood(X[i:j])
            logprobij, fwdlattice = self._do_forward_pass(framelogprob)
            logprob += logprobij

            bwdlattice = self._do_backward_pass(framelogprob)
            posteriors[i:j] = self._compute_posteriors(fwdlattice, bwdlattice)
        return logprob, posteriors
项目:NetPower_TestBed    作者:Vignesh2208    | 项目源码 | 文件源码
def score(self, X, lengths=None):
        """Compute the log probability under the model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix of individual samples.

        lengths : array-like of integers, shape (n_sequences, ), optional
            Lengths of the individual sequences in ``X``. The sum of
            these should be ``n_samples``.

        Returns
        -------
        logprob : float
            Log likelihood of ``X``.

        See Also
        --------
        score_samples : Compute the log probability under the model and
            posteriors.
        decode : Find most likely state sequence corresponding to ``X``.
        """
        check_is_fitted(self, "startprob_")
        self._check()

        X = check_array(X)
        # XXX we can unroll forward pass for speed and memory efficiency.
        logprob = 0
        for i, j in iter_from_X_lengths(X, lengths):
            framelogprob = self._compute_log_likelihood(X[i:j])
            logprobij, _fwdlattice = self._do_forward_pass(framelogprob)
            logprob += logprobij
        return logprob
项目:icing    作者:slipguru    | 项目源码 | 文件源码
def fit(self, X, **kwargs):
        """Apply affinity propagation clustering.

        Create affinity matrix from negative euclidean distances if required.

        Parameters
        ----------
        X: array-like or sparse matrix,
                shape (n_samples, n_features) or (n_samples, n_samples)
            Data matrix or, if affinity is ``precomputed``, matrix of
            similarities / affinities.
        """
        if not issparse(X):
            return super(AffinityPropagation, self).fit(X, **kwargs)

        # Since X is sparse, this converts it in a coo_matrix if required
        X = check_array(X, accept_sparse='coo')
        if self.affinity == "precomputed":
            self.affinity_matrix_ = X
        elif self.affinity == "euclidean":
            self.affinity_matrix_ = coo_matrix(
                -euclidean_distances(X, squared=True))
        else:
            raise ValueError("Affinity must be 'precomputed' or "
                             "'euclidean'. Got %s instead"
                             % str(self.affinity))

        self.cluster_centers_indices_, self.labels_, self.n_iter_ = \
            sparse_ap(
                self.affinity_matrix_, self.preference, max_iter=self.max_iter,
                convergence_iter=self.convergence_iter, damping=self.damping,
                copy=self.copy, verbose=self.verbose, return_n_iter=True,
                convergence_percentage=self.convergence_percentage)

        if self.affinity != "precomputed":
            self.cluster_centers_ = X.data[self.cluster_centers_indices_].copy()

        return self
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def predict(self, X, quantile=None, check_input=False):
        """
        Predict regression value for X.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        quantile : int, optional
            Value ranging from 0 to 100. By default, the mean is returned.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Returns
        -------
        y : array of shape = [n_samples]
            If quantile is set to None, then return E(Y | X). Else return
            y such that F(Y=y | x) = quantile.
        """
        # apply method requires X to be of dtype np.float32
        X = check_array(X, dtype=np.float32, accept_sparse="csc")
        if quantile is None:
            return super(BaseTreeQuantileRegressor, self).predict(X, check_input=check_input)

        quantiles = np.zeros(X.shape[0])
        X_leaves = self.apply(X)
        unique_leaves = np.unique(X_leaves)
        for leaf in unique_leaves:
            quantiles[X_leaves == leaf] = weighted_percentile(
                self.y_train_[self.y_train_leaves_ == leaf], quantile)
        return quantiles
项目:scikit-garden    作者:scikit-garden    | 项目源码 | 文件源码
def predict(self, X, quantile=None):
        """
        Predict regression value for X.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        quantile : int, optional
            Value ranging from 0 to 100. By default, the mean is returned.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Returns
        -------
        y : array of shape = [n_samples]
            If quantile is set to None, then return E(Y | X). Else return
            y such that F(Y=y | x) = quantile.
        """
        # apply method requires X to be of dtype np.float32
        X = check_array(X, dtype=np.float32, accept_sparse="csc")
        if quantile is None:
            return super(BaseForestQuantileRegressor, self).predict(X)

        sorter = np.argsort(self.y_train_)
        X_leaves = self.apply(X)
        weights = np.zeros((X.shape[0], len(self.y_train_)))
        quantiles = np.zeros((X.shape[0]))
        for i, x_leaf in enumerate(X_leaves):
            mask = self.y_train_leaves_ != np.expand_dims(x_leaf, 1)
            x_weights = ma.masked_array(self.y_weights_, mask)
            weights = x_weights.sum(axis=0)
            quantiles[i] = weighted_percentile(
                self.y_train_, quantile, weights, sorter)
        return quantiles
项目:extreme-learning-machines    作者:IssamLaradji    | 项目源码 | 文件源码
def _decision_scores(self, X):
        """Predict using the ELM model

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The input data.

        Returns
        -------
        y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The predicted values.
        """
        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])

        if self.batch_size is None:
            hidden_activations = self._compute_hidden_activations(X)
            y_pred = safe_sparse_dot(hidden_activations, self.coef_output_)
        else:
            n_samples = X.shape[0]
            batches = gen_batches(n_samples, self.batch_size)

            y_pred = np.zeros((n_samples, self.n_outputs_))
            for batch in batches:
                h_batch = self._compute_hidden_activations(X[batch])
                y_pred[batch] = safe_sparse_dot(h_batch, self.coef_output_)

        return y_pred
项目:odin    作者:imito    | 项目源码 | 文件源码
def fit(self, X, y=None):
    """Fit the model with X, using minibatches of size batch_size.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)
        Training data, where n_samples is the number of samples and
        n_features is the number of features.

    y: Passthrough for ``Pipeline`` compatibility.

    Returns
    -------
    self: object
        Returns the instance itself.
    """
    if isinstance(X, Data):
      X = X[:]
    X = check_array(X, copy=self.copy, dtype=[np.float64, np.float32])
    n_samples, n_features = X.shape

    if self.batch_size is None:
      batch_size = 12 * n_features
    else:
      batch_size = self.batch_size

    for batch in gen_batches(n_samples, batch_size):
      x = X[batch]
      self.partial_fit(x, check_input=False)
    return self