Python sklearn.model_selection 模块,learning_curve() 实例源码

我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用sklearn.model_selection.learning_curve()

项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    with warnings.catch_warnings(record=True) as w:
        train_sizes, train_scores, test_scores = learning_curve(
            estimator, X, y, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
    if len(w) > 0:
        raise RuntimeError("Unexpected warning: %r" % w[0].message)
    assert_equal(train_scores.shape, (10, 3))
    assert_equal(test_scores.shape, (10, 3))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_verbose():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)

    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        train_sizes, train_scores, test_scores = \
            learning_curve(estimator, X, y, cv=3, verbose=1)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout

    assert("[learning_curve]" in out)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_batch_and_incremental_learning_are_equal():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    train_sizes = np.linspace(0.2, 1.0, 5)
    estimator = PassiveAggressiveClassifier(n_iter=1, shuffle=False)

    train_sizes_inc, train_scores_inc, test_scores_inc = \
        learning_curve(
            estimator, X, y, train_sizes=train_sizes,
            cv=3, exploit_incremental_learning=True)
    train_sizes_batch, train_scores_batch, test_scores_batch = \
        learning_curve(
            estimator, X, y, cv=3, train_sizes=train_sizes,
            exploit_incremental_learning=False)

    assert_array_equal(train_sizes_inc, train_sizes_batch)
    assert_array_almost_equal(train_scores_inc.mean(axis=1),
                              train_scores_batch.mean(axis=1))
    assert_array_almost_equal(test_scores_inc.mean(axis=1),
                              test_scores_batch.mean(axis=1))
项目:NLP-JD    作者:ZexinYan    | 项目源码 | 文件源码
def plot_learning_curve(self):
        # Plot the learning curve
        plt.figure(figsize=(9, 6))
        train_sizes, train_scores, test_scores = learning_curve(
            self.model, X=self.X_train, y=self.y_train,
            cv=3, scoring='neg_mean_squared_error')
        self.plot_learning_curve_helper(train_sizes, train_scores, test_scores, 'Learning Curve')
        plt.show()
项目:time_series_modeling    作者:rheineke    | 项目源码 | 文件源码
def plot_learning_curve(estimators, X, y, cv=10, scoring=None, n_jobs=1):
    figsize = (6.4 * len(estimators), 4.8)
    fig, axes = plt.subplots(nrows=1, ncols=len(estimators), figsize=figsize)

    if len(estimators) == 1:
        axes = [axes]

    for ax, estimator in zip(axes, estimators):
        train_sizes, train_scores, test_scores = learning_curve(
            estimator=estimator,
            X=X,
            y=y,
            train_sizes=np.linspace(start=0.1, stop=1.0, num=10),
            cv=cv,
            scoring=None,
            n_jobs=n_jobs,
            verbose=1
        )
        xlabel = 'Number of training samples'
        _plot_curve(
            axes=ax,
            train_sizes=train_sizes,
            train_scores=train_scores,
            test_scores=test_scores,
            xlabel=xlabel,
            scoring=scoring
        )
        ax.set_title(pipeline_name(estimator))
    return fig
项目:DATA-SCIENCE    作者:hjj1015    | 项目源码 | 文件源码
def plot_learning_curve(est, x, y):
    from sklearn.model_selection import learning_curve,KFold
    training_set_size, train_scores, test_scores = learning_curve(
        est, x, y, train_sizes=np.linspace(.1, 1, 20), cv=KFold(20, shuffle=True, random_state=1))
    estimator_name = est.__class__.__name__
    line = plt.plot(training_set_size, train_scores.mean(axis=1), '--',
                    label="training " + estimator_name)
    plt.plot(training_set_size, test_scores.mean(axis=1), '-',
             label="test " + estimator_name, c=line[0].get_color())
    plt.xlabel('Training set size')
    plt.ylabel('Score (R^2)')
    plt.ylim(0, 1.1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_unsupervised():
    X, _ = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y=None, cv=3, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_incremental_learning_not_possible():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    # The mockup does not have partial_fit()
    estimator = MockImprovingEstimator(1)
    assert_raises(ValueError, learning_curve, estimator, X, y,
                  exploit_incremental_learning=True)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_incremental_learning():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockIncrementalImprovingEstimator(20)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=3, exploit_incremental_learning=True,
        train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_n_sample_range_out_of_bounds():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0, 1])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0.0, 1.0])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0.1, 1.1])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[0, 20])
    assert_raises(ValueError, learning_curve, estimator, X, y, cv=3,
                  train_sizes=[1, 21])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_remove_duplicate_sample_sizes():
    X, y = make_classification(n_samples=3, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(2)
    train_sizes, _, _ = assert_warns(
        RuntimeWarning, learning_curve, estimator, X, y, cv=3,
        train_sizes=np.linspace(0.33, 1.0, 3))
    assert_array_equal(train_sizes, [1, 2])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_learning_curve_with_boolean_indices():
    X, y = make_classification(n_samples=30, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    estimator = MockImprovingEstimator(20)
    cv = KFold(n_folds=3)
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, train_sizes=np.linspace(0.1, 1.0, 10))
    assert_array_equal(train_sizes, np.linspace(2, 20, 10))
    assert_array_almost_equal(train_scores.mean(axis=1),
                              np.linspace(1.9, 1.0, 10))
    assert_array_almost_equal(test_scores.mean(axis=1),
                              np.linspace(0.1, 1.0, 10))
项目:karura    作者:chakki-works    | 项目源码 | 文件源码
def _set_description(self, dfe):
        importances = pd.Series(self.model.feature_importances_, index=dfe.get_features().columns).sort_values(ascending=False)

        y = dfe.df[dfe.target]
        X = dfe.df.drop(dfe.target, axis=1)
        train_sizes, train_scores, test_scores = learning_curve(self.model, X, y, n_jobs=self.n_jobs)
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)

        pic = ImageFile.create()
        with pic.plot() as plt_fig:
            plt, fig = plt_fig
            fig.set_figwidth(12)
            plt.subplot(121)
            importances.plot(kind="bar")

            ax2 = plt.subplot(122)
            ax2.fill_between(train_sizes, train_scores_mean - train_scores_std,
                train_scores_mean + train_scores_std, alpha=0.1,color="r")
            ax2.fill_between(train_sizes, test_scores_mean - test_scores_std,
                test_scores_mean + test_scores_std, alpha=0.1, color="g")
            ax2.plot(train_sizes, train_scores_mean, "o-", color="r", label="????" if self.lang == "ja" else "Training score")
            ax2.plot(train_sizes, test_scores_mean, 'o-', color="g", label="????" if self.lang == "ja" else "Cross-validation score")
            ax2.set_xlabel("??????(??)" if self.lang == "ja" else "data records")
            ax2.set_ylabel("??" if self.lang == "ja" else "accuracy")
            ax2.set_ylim(0, 1)
            ax2.legend(loc="best")

        params = (self.score, self.model.__class__.__name__)
        self.description = {
            "ja": Description("???????{:.3f}??(?????:{})?????????????????????".format(*params), pic),
            "en": Description("The model accuracy is {:.3f}(model is {}). The contributions of each features are here.".format(*params), pic)
        }
项目:dmon-adp    作者:igabriel85    | 项目源码 | 文件源码
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
                        n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)):
    """
    Generate a simple plot of the test and traning learning curve.

    Parameters
    ----------
    estimator : object type that implements the "fit" and "predict" methods
        An object of that type which is cloned for each validation.

    title : string
        Title for the chart.

    X : array-like, shape (n_samples, n_features)
        Training vector, where n_samples is the number of samples and
        n_features is the number of features.

    y : array-like, shape (n_samples) or (n_samples, n_features), optional
        Target relative to X for classification or regression;
        None for unsupervised learning.

    ylim : tuple, shape (ymin, ymax), optional
        Defines minimum and maximum yvalues plotted.

    cv : integer, cross-validation generator, optional
        If an integer is passed, it is the number of folds (defaults to 3).
        Specific cross-validation objects can be passed, see
        sklearn.cross_validation module for the list of possible objects

    n_jobs : integer, optional
        Number of jobs to run in parallel (default 1).
    """
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    return plt
项目:AlphaPy    作者:ScottFreeLLC    | 项目源码 | 文件源码
def generate_plots(model, partition):
    r"""Generate plots while running the pipeline.

    Parameters
    ----------
    model : alphapy.Model
        The model object with plotting specifications.
    partition : alphapy.Partition
        Reference to the dataset.

    Returns
    -------
    None : None

    """

    logger.info('='*80)
    logger.info("Generating Plots for partition: %s", datasets[partition])

    # Extract model parameters

    calibration_plot = model.specs['calibration_plot']
    confusion_matrix = model.specs['confusion_matrix']
    importances = model.specs['importances']
    learning_curve = model.specs['learning_curve']
    roc_curve = model.specs['roc_curve']

    # Generate plots

    if calibration_plot:
        plot_calibration(model, partition)
    if confusion_matrix:
        plot_confusion_matrix(model, partition)
    if roc_curve:
        plot_roc_curve(model, partition)
    if partition == Partition.train:
        if learning_curve:
            plot_learning_curve(model, partition)
        if importances:
            plot_importance(model, partition)


#
# Function get_plot_directory
#
项目:postlearn    作者:TomAugspurger    | 项目源码 | 文件源码
def plot_learning_curve(estimator, X, y, train_sizes=np.linspace(.1, 1.0, 5),
                        cv=None, n_jobs=1, ax=None):
    '''
    Plot the learning curve for `estimator`.

    Parameters
    ----------
    estimator : sklearn.Estimator
    X : array-like
    y : array-like
    train_sizes : array-like
        list of floats between 0 and 1
    cv : int
    n_jobs : int
    ax : matplotlib.axes
    '''
    # http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html
    if ax is None:
        fig, ax = plt.subplots()
    ax.set_xlabel("Training examples")
    ax.set_ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
        estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes
    )
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()

    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color="r")
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1, color="g")
    plt.plot(train_sizes, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")

    plt.legend(loc="best")
    return ax
项目:Power-Consumption-Prediction    作者:YoungGod    | 项目源码 | 文件源码
def plot_learning_curve(estimator, title, X, y,
                        ylim=None, cv=None, scoring=None,
                        n_jobs=1, train_sizes=np.linspace(0.1, 1.0, 5)):
    """
    Generate a simple plot of the test and training learning curve

    Parameters
    ----------
    estimator: object type that implements the "fit" and "predict" methods.
    title: string; title for the chart.
    X: traning vector, shape (n_samples, n_features)
    y: target, shape (n_samples,)
    ylim: tuple, shape (ymin, ymax)
          Defines minimum and maximum yvalues plotted.
    cv: int, cross-validation generator or an iterable
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:
          - None, to use the dafault 3-fold cross-validation
          - Interger, to specify the number of folds
          - An object to be used as a cross-validation generator
    """
    from sklearn.model_selection import learning_curve
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Training examples")
    plt.ylabel("Score")
    train_sizes, train_scores, test_scores = learning_curve(
            estimator, X, y, cv=cv, n_jobs=n_jobs, 
            train_sizes=train_sizes, scoring=scoring)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()
    plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.1,
                     color='r')
    plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.1,
                     color='g')
    plt.plot(train_sizes, train_scores_mean, 'o-', color='r',
             label="Training score")
    plt.plot(train_sizes, test_scores_mean, 'o-', color='g',
             label="Cross-validation score")
    plt.legend(loc="best")
    return plt,train_sizes