Python sklearn.ensemble 模块,GradientBoostingClassifier() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.ensemble.GradientBoostingClassifier()

项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
项目:DataMiningCompetitionFirstPrize    作者:lzddzh    | 项目源码 | 文件源码
def learn(x, y, test_x):
    # set sample weight


    weight_list = []
    for j in range(len(y)):
        if y[j] == "0":
            weight_list.append(variables.weight_0_gdbt_b)
        if y[j] == "1000":
            weight_list.append(variables.weight_1000_gdbt_b)
        if y[j] == "1500":
            weight_list.append(variables.weight_1500_gdbt_b)
        if y[j] == "2000":
            weight_list.append(variables.weight_2000_gdbt_b)

    clf = GradientBoostingClassifier(loss='deviance', n_estimators=variables.n_estimators_gdbt_b,
                                     learning_rate=variables.learning_rate_gdbt_b,
                                     max_depth=variables.max_depth_gdbt_b, random_state=0,
                                     min_samples_split=variables.min_samples_split_gdbt_b,
                                     min_samples_leaf=variables.min_samples_leaf_gdbt_b,
                                     subsample=variables.subsample_gdbt_b,
                                     ).fit(x, y, weight_list)
    prediction_list = clf.predict(test_x)

    return prediction_list
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def __init__(
        self, data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=GradientBoostingClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def createPipeline(self):
        self.pipeline = Pipeline([
            ('model', GradientBoostingClassifier(
                loss = self.conf.loss,
                learning_rate = self.conf.learning_rate,
                n_estimators = self.conf.n_estimators,
                criterion = self.conf.criterion,
                max_depth = self.conf.max_depth,
                min_samples_split = self.conf.min_samples_split,
                min_samples_leaf = self.conf.min_samples_leaf,
                min_weight_fraction_leaf = self.conf.min_weight_fraction_leaf,
                subsample = self.conf.subsample,
                max_features = self.conf.max_features,
                max_leaf_nodes = self.conf.max_leaf_nodes,
                min_impurity_split = self.conf.min_impurity_decrease,
                presort = self.conf.presort))])
项目:SentiCR    作者:senticr    | 项目源码 | 文件源码
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def _train_convert_evaluate(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = GradientBoostingClassifier(random_state = 1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = scikit_model.predict(self.X)

        # Evaluate it
        metrics = evaluate_classifier(spec, df)
        return metrics
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:yttresearch-machine-learning-algorithms-analysis    作者:gdemos01    | 项目源码 | 文件源码
def GradientBoostingDecisionTree_Export(action):

        # Setting our classifier to Gradient Boosting
        clf = GradientBoostingClassifier()

        dir = input('Give Data Directory: ')

        if int(action) == 1:
                print('Loading Data')
                PopularityClassifier.loadData(dir)   
                PopularityClassifier.youtubePopular(dir,clf,2)
                PopularityClassifier.twitterPopular(dir,clf,2)       
                PopularityClassifier.bothPopular(dir,clf,2)
        elif int(action) == 2:
                print('Loading Data')
                ViralityClassifier.loadData(dir)
                ViralityClassifier.youtubeViral(dir,clf,2)
                ViralityClassifier.twitterViral(dir,clf,2)
                ViralityClassifier.bothViral(dir,clf,2)
        else:
                print('Loading Data')
                ViralityAndPopularityClassifier.loadData(dir)
                ViralityAndPopularityClassifier.youtubeViralAndPopular(dir,clf,2)
                ViralityAndPopularityClassifier.twitterViralAndPopular(dir,clf,2)
                ViralityAndPopularityClassifier.bothViralAndPopular(dir,clf,2)
项目:yttresearch-machine-learning-algorithms-analysis    作者:gdemos01    | 项目源码 | 文件源码
def classify():

        #Predict Popularity
        gbdt = GradientBoostingClassifier()
        gbdt.fit(X,YP)
        gbdt.predict(videos)
        print(valVir.shape)
        prediction = gbdt.predict(videos)
        print(prediction)

        same=0
        for i in range(0,valPop.size):
                if valPop[i]==prediction[i]:
                        same = same+1

        accurancy = same/valPop.size *100
        print(accurancy)
项目:dut_tianchi_mobile_recommend_train    作者:ningshixian    | 项目源码 | 文件源码
def classify_user_item(train_data_new, test_data_new, result9):
    data = np.loadtxt(train_data_new)
    X = data[:, :-1]  # select columns 0 through end-1
    y = data[:, -1]  # select column end
    print X
    print y
    print 'start train'

    clf2 = RandomForestClassifier(n_estimators=100)
    # clf2=GradientBoostingClassifier()
    clf2.fit(X, y)
    # clf2 = LogisticRegression().fit(X, y)
    print clf2.classes_

    data1 = np.loadtxt(test_data_new)
    X_test = data1[:, :]
    print 'testing data is ok'
    result = clf2.predict_proba(X_test)
    print 'output result'
    print result

    f_result = open(result9, 'w')
    for i in range(0, len(result)):
        f_result.write(str(result[i]) + '\n')
项目:Quora-Kaggle    作者:PPshrimpGo    | 项目源码 | 文件源码
def GradientBoostingClassifier(X_train, y_train, X_test):
    from sklearn.ensemble import GradientBoostingClassifier
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier start in " + now.strftime('%Y-%m-%d %H:%M:%S'))
    GBC = GradientBoostingClassifier(max_features = 'sqrt',
                                     n_estimators = 300,
                                     learning_rate = 0.02,
                                     max_depth = 8,
                                     subsample = 0.8,
                                     n_jobs =4)
    GBC.fit(X_train, y_train)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier train done in " + now.strftime('%Y-%m-%d %H:%M:%S'))

    y_pred_GBC = GBC.predict_proba(X_test)
    y_pred_GBC = pd.DataFrame(y_pred_GBC[:,1:2],columns=['GBC_predictions'])
    y_pred_GBC.to_csv('GBC_result_all.csv', index=False)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier predict done in " + now.strftime('%Y-%m-%d %H:%M:%S'))
项目:Quora-Kaggle    作者:PPshrimpGo    | 项目源码 | 文件源码
def GradientBoostingClassifier(X_train, y_train, X_test):
    from sklearn.ensemble import GradientBoostingClassifier
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier start in " + now.strftime('%Y-%m-%d %H:%M:%S'))
    GBC = GradientBoostingClassifier(max_features = 'sqrt',
                                     n_estimators = 300,
                                     learning_rate = 0.02,
                                     max_depth = 8,
                                     subsample = 0.8)
    GBC.fit(X_train, y_train)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier train done in " + now.strftime('%Y-%m-%d %H:%M:%S'))

    y_pred_GBC = GBC.predict_proba(X_test)
    y_pred_GBC = pd.DataFrame(y_pred_GBC[:,1:2],columns=['GBC_predictions'])
    y_pred_GBC.to_csv('GBC_result_1.csv', index=False)
    now = datetime.datetime.now()
    print ("GradientBoostingClassifier predict done in " + now.strftime('%Y-%m-%d %H:%M:%S'))
项目:talkbot    作者:nimnull    | 项目源码 | 文件源码
def on_startup(app):
    connector = aiohttp.TCPConnector(limit=5, use_dns_cache=True, loop=app.loop)
    session = aiohttp.ClientSession(connector=connector, raise_for_status=True)
    bot = TelegramBot(app['config'].token, session)
    image_model = fit_model(app['config'].sample_df)

    def config_injections(binder):
        # injection bindings
        binder.bind(Config, app['config'])
        binder.bind(TelegramBot, bot)
        binder.bind(GradientBoostingClassifier, image_model)
        binder.bind_to_constructor(AsyncIOMotorDatabase, init_database)


    try:
        inject.configure(config_injections)
    except inject.InjectorException:
        log.error("Injector already configured", exc_info=True)

    setup_logging(log)

    app.loop.create_task(bot.set_hook())
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_GradientBoostingClassifier_num(*data):
    '''
    test the performance with different n_estimators
    :param data:    train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    nums=np.arange(1,100,step=2)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for num in nums:
        clf=ensemble.GradientBoostingClassifier(n_estimators=num)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(nums,training_scores,label="Training Score")
    ax.plot(nums,testing_scores,label="Testing Score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_GradientBoostingClassifier_maxdepth(*data):
    '''
    test the performance with different max_depth
    :param data:     train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    maxdepths=np.arange(1,20)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for maxdepth in maxdepths:
        clf=ensemble.GradientBoostingClassifier(max_depth=maxdepth,max_leaf_nodes=None)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(maxdepths,training_scores,label="Training Score")
    ax.plot(maxdepths,testing_scores,label="Testing Score")
    ax.set_xlabel("max_depth")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_GradientBoostingClassifier_learning(*data):
    '''
    test the performance with different learning rate
    :param data:     train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    learnings=np.linspace(0.01,1.0)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    testing_scores=[]
    training_scores=[]
    for learning in learnings:
        clf=ensemble.GradientBoostingClassifier(learning_rate=learning)
        clf.fit(X_train,y_train)
        training_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(learnings,training_scores,label="Training Score")
    ax.plot(learnings,testing_scores,label="Testing Score")
    ax.set_xlabel("learning_rate")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_GradientBoostingClassifier_subsample(*data):
    '''
    test the performance with different subsample
    :param data:    train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    subsamples=np.linspace(0.01,1.0)
    testing_scores=[]
    training_scores=[]
    for subsample in subsamples:
            clf=ensemble.GradientBoostingClassifier(subsample=subsample)
            clf.fit(X_train,y_train)
            training_scores.append(clf.score(X_train,y_train))
            testing_scores.append(clf.score(X_test,y_test))
    ax.plot(subsamples,training_scores,label="Training Score")
    ax.plot(subsamples,testing_scores,label="Training Score")
    ax.set_xlabel("subsample")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1.05)
    plt.suptitle("GradientBoostingClassifier")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_friedman_mse_in_graphviz():
    clf = DecisionTreeRegressor(criterion="friedman_mse", random_state=0)
    clf.fit(X, y)
    dot_data = StringIO()
    export_graphviz(clf, out_file=dot_data)

    clf = GradientBoostingClassifier(n_estimators=2, random_state=0)
    clf.fit(X, y)
    for estimator in clf.estimators_:
        export_graphviz(estimator[0], out_file=dot_data)

    for finding in finditer("\[.*?samples.*?\]", dot_data.getvalue()):
        assert_in("friedman_mse", finding.group())
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def check_classification_toy(presort, loss):
    # Check classification on a toy dataset.
    clf = GradientBoostingClassifier(loss=loss, n_estimators=10,
                                     random_state=1, presort=presort)

    assert_raises(ValueError, clf.predict, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(10, len(clf.estimators_))

    deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
    assert_true(np.any(deviance_decrease >= 0.0))

    leaves = clf.apply(X)
    assert_equal(leaves.shape, (6, 10, 1))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_probability_log():
    # Predict probabilities.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert_true(np.all(y_proba >= 0.0))
    assert_true(np.all(y_proba <= 1.0))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_check_inputs_predict():
    # X has wrong shape
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
    clf.fit(X, y)

    x = np.array([1.0, 2.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    x = np.array([[]])
    assert_raises(ValueError, clf.predict, x)

    x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    clf = GradientBoostingRegressor(n_estimators=100, random_state=1)
    clf.fit(X, rng.rand(len(X)))

    x = np.array([1.0, 2.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)

    x = np.array([[]])
    assert_raises(ValueError, clf.predict, x)

    x = np.array([1.0, 2.0, 3.0])[:, np.newaxis]
    assert_raises(ValueError, clf.predict, x)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_staged_functions_defensive():
    # test that staged_functions make defensive copies
    rng = np.random.RandomState(0)
    X = rng.uniform(size=(10, 3))
    y = (4 * X[:, 0]).astype(np.int) + 1  # don't predict zeros
    for estimator in [GradientBoostingRegressor(),
                      GradientBoostingClassifier()]:
        estimator.fit(X, y)
        for func in ['predict', 'decision_function', 'predict_proba']:
            staged_func = getattr(estimator, "staged_" + func, None)
            if staged_func is None:
                # regressor has no staged_predict_proba
                continue
            with warnings.catch_warnings(record=True):
                staged_result = list(staged_func(X))
            staged_result[1][:] = 0
            assert_true(np.all(staged_result[0] != 0))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_serialization():
    # Check model serialization.
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))

    try:
        import cPickle as pickle
    except ImportError:
        import pickle

    serialized_clf = pickle.dumps(clf, protocol=pickle.HIGHEST_PROTOCOL)
    clf = None
    clf = pickle.loads(serialized_clf)
    assert_array_equal(clf.predict(T), true_result)
    assert_equal(100, len(clf.estimators_))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_more_verbose_output():
    # Check verbose=2 does not cause error.
    from sklearn.externals.six.moves import cStringIO as StringIO
    import sys
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                     verbose=2)
    clf.fit(X, y)
    verbose_output = sys.stdout
    sys.stdout = old_stdout

    # check output
    verbose_output.seek(0)
    header = verbose_output.readline().rstrip()
    # no OOB
    true_header = ' '.join(['%10s'] + ['%16s'] * 2) % (
        'Iter', 'Train Loss', 'Remaining Time')
    assert_equal(true_header, header)

    n_lines = sum(1 for l in verbose_output.readlines())
    # 100 lines for n_estimators==100
    assert_equal(100, n_lines)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_warm_start_oob():
    # Test if warm start OOB equals fit.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]:
        est = Cls(n_estimators=200, max_depth=1, subsample=0.5,
                  random_state=1)
        est.fit(X, y)

        est_ws = Cls(n_estimators=100, max_depth=1, subsample=0.5,
                     random_state=1, warm_start=True)
        est_ws.fit(X, y)
        est_ws.set_params(n_estimators=200)
        est_ws.fit(X, y)

        assert_array_almost_equal(est_ws.oob_improvement_[:100],
                                  est.oob_improvement_[:100])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_probability_exponential():
    # Predict probabilities.
    clf = GradientBoostingClassifier(loss='exponential',
                                     n_estimators=100, random_state=1)

    assert_raises(ValueError, clf.predict_proba, T)

    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)

    # check if probabilities are in [0, 1].
    y_proba = clf.predict_proba(T)
    assert_true(np.all(y_proba >= 0.0))
    assert_true(np.all(y_proba <= 1.0))
    score = clf.decision_function(T).ravel()
    assert_array_almost_equal(y_proba[:, 1],
                              1.0 / (1.0 + np.exp(-2 * score)))

    # derive predictions from probabilities
    y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0)
    assert_array_equal(y_pred, true_result)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_partial_dependence_classifier():
    # Test partial dependence for classifier
    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
    clf.fit(X, y)

    pdp, axes = partial_dependence(clf, [0], X=X, grid_resolution=5)

    # only 4 grid points instead of 5 because only 4 unique X[:,0] vals
    assert pdp.shape == (1, 4)
    assert axes[0].shape[0] == 4

    # now with our own grid
    X_ = np.asarray(X)
    grid = np.unique(X_[:, 0])
    pdp_2, axes = partial_dependence(clf, [0], grid=grid)

    assert axes is None
    assert_array_equal(pdp, pdp_2)
项目:AutoML5    作者:djajetic    | 项目源码 | 文件源码
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
项目:hyperband    作者:zygmuntz    | 项目源码 | 文件源码
def try_params( n_iterations, params ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    clf = GB( n_estimators = n_estimators, verbose = 0, **params )

    return train_and_eval_sklearn_classifier( clf, data )
项目:ISM2017    作者:ybayle    | 项目源码 | 文件源码
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
项目:SIDR    作者:damurdock    | 项目源码 | 文件源码
def constructModel(corpus, classList, features, modelOutput):
    """
    Trains a Decision Tree model on the test corpus.

    Args:
        corpus: A list of lists, containing the GC content, coverage, and class number.
        classList: A list of class names.
        features: List of variables used by each contig.
        modelOutput: Location to save model as GraphViz DOT, or False to save no model.
    Returns:
        classifier: A DecisionTreeClassifier object that has been trained on the test corpus.
    """
    corpus.sort()  # just in case
    X = []
    Y = []
    for item in corpus:
        X.append(item[:-1]) # all but the last item
        Y.append(item[-1]) # only the last item
    X_train, X_test, Y_train, Y_test = mscv.train_test_split(X, Y, test_size=0.3, random_state=0)
    # TODO: implement classifier testing and comparison, now only baggingClassifier is used as per paper
    #treeClassifier = tree.DecisionTreeClassifier()
    #treeClassifier = treeClassifier.fit(X_train, Y_train)
    #click.echo("Decision tree classifier built, score is %s out of 1.00" % treeClassifier.score(X_test, Y_test))
    baggingClassifier = ensemble.BaggingClassifier()
    baggingClassifier = baggingClassifier.fit(X_train, Y_train)
    click.echo("Bagging classifier built, score is %s out of 1.00" % baggingClassifier.score(X_test, Y_test))
    #forestClassifier = ensemble.RandomForestClassifier(n_estimators=10)
    #forestClassifier = forestClassifier.fit(X_train, Y_train)
    #click.echo("Random forest classifier built, score is %s out of 1.00" % forestClassifier.score(X_test, Y_test))
    #adaClassifier = ensemble.AdaBoostClassifier(n_estimators=100)
    #adaClassifier = adaClassifier.fit(X_train, Y_train)
    #click.echo("AdaBoost classifier built, score is %s out of 1.00" % adaClassifier.score(X_test, Y_test))
    #gradientClassifier = ensemble.GradientBoostingClassifier(n_estimators=100)
    #gradientClassifier = gradientClassifier.fit(X_train, Y_train)
    #click.echo("Gradient tree boosting classifier built, score is %s out of 1.00" % gradientClassifier.score(X_test, Y_test))
    if modelOutput:
        with open(modelOutput, 'w') as dotfile:
            tree.export_graphviz(baggingClassifier, out_file=dotfile, feature_names=features,
                                 class_names=classList, filled=True, rounded=True, special_characters=True)
    return baggingClassifier
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def score(self, estimator, X, y, advanced_scoring=False):

        X, y = utils.drop_missing_y_vals(X, y, output_column=None)

        if isinstance(estimator, GradientBoostingClassifier):
            X = X.toarray()

        predictions = estimator.predict_proba(X)


        if self.scoring_method == 'brier_score_loss':
            # At the moment, Microsoft's LightGBM returns probabilities > 1 and < 0, which can break some scoring functions. So we have to take the max of 1 and the pred, and the min of 0 and the pred.
            probas = [max(min(row[1], 1), 0) for row in predictions]
            predictions = probas

        try:
            score = self.scoring_func(y, predictions)
        except ValueError as e:
            bad_val_indices = []
            for idx, val in enumerate(y):
                if str(val) in bad_vals_as_strings:
                    bad_val_indices.append(idx)

            predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
            y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]

            print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset')
            try:
                score = self.scoring_func(y, predictions)
            except ValueError:
                # Sometimes, particularly for a badly fit model using either too little data, or a really bad set of hyperparameters during a grid search, we can predict probas that are > 1 or < 0. We'll cap those here, while warning the user about them, because they're unlikely to occur in a model that's properly trained with enough data and reasonable params
                predictions = self.clean_probas(predictions)
                score = self.scoring_func(y, predictions)


        if advanced_scoring:
            return (-1 * score, predictions)
        else:
            return -1 * score
项目:AutoML4    作者:djajetic    | 项目源码 | 文件源码
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
项目:aesthetics    作者:shubhamchaudhary    | 项目源码 | 文件源码
def get_classification():
    clf = svm.SVC()
    clf = ensemble.GradientBoostingClassifier()
    return clf
项目:nirdizati-runtime    作者:nirdizati    | 项目源码 | 文件源码
def __init__(self, nr_events, case_id_col, label_col, encoder_kwargs, cls_kwargs, cls_method="rf"):

        self.case_id_col = case_id_col
        self.label_col = label_col

        self.encoder = SequenceEncoder(nr_events=nr_events, case_id_col=case_id_col, label_col=label_col,
                                       **encoder_kwargs)

        if cls_method == "gbm":
            self.cls = GradientBoostingClassifier(**cls_kwargs)
        elif cls_method == "rf":
            self.cls = RandomForestClassifier(**cls_kwargs)
        else:
            print("Classifier method not known")
项目:JData    作者:ottsion    | 项目源码 | 文件源码
def GBDT_classify(train_dataSet_path, test_dataSet_path, train_one_and_two_result_as_proba_path):

    train_data = pd.read_csv(train_dataSet_path)
    train_data = train_data.as_matrix()
    X_train = train_data[:, 2:-1]  # select columns 0 through end-1
    y_train = train_data[:, -1]  # select column end

    test_data = pd.read_csv(test_dataSet_path)
    test_data = test_data.as_matrix()
    X_test = test_data[:, 2:-1]  # select columns 0 through end-1
    y_test = test_data[:, -1]  # select column end 

    clf = GradientBoostingClassifier(n_estimators=200)
    clf.fit(X_train, y_train)
    pre_y_test = clf.predict_proba(X_test)
    print pre_y_test
    print("GBDT Metrics : {0}".format(precision_recall_fscore_support(y_test, pre_y_test)))

    print u'????.....'
    f_result = open(test_dataSet_prob_path, 'w')
    for i in range(0, len(pre_y_test)):
        if i==0:
            print str(pre_y_test[i][0])
        if i==len(pre_y_test)-1:
            print str(pre_y_test[i][0])
        f_result.write(str(pre_y_test[i][0]) + '\n')   

    return clf
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performGTBClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Gradient Tree Boosting binary Classification
    """
    clf = GradientBoostingClassifier(n_estimators=100)
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    print "GTBClass: ", accuracy
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performGTBClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Gradient Tree Boosting binary Classification
    """
    clf = GradientBoostingClassifier(n_estimators=100)
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "GTBClass: ", accuracy
项目:FLASH    作者:yuyuz    | 项目源码 | 文件源码
def get_data_preprocessor_balancing(params, y):
    d_balancing = params['layer_dict_list'][1]

    if params['balancing'] == str(d_balancing['None']) or params['balancing'] == 'None':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = None
        # for clf: ['Adasample_weightBoostClassifier', 'GradientBoostingClassifier']
        params['sample_weight'] = None
    elif params['balancing'] == str(d_balancing['weighting']) or params['balancing'] == 'weighting':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = 'auto'
        # for clf: ['AdaBoostClassifier', 'GradientBoostingClassifier']
        if len(y.shape) > 1:
            offsets = [2 ** i for i in range(y.shape[1])]
            y_ = np.sum(y * offsets, axis=1)
        else:
            y_ = y
        unique, counts = np.unique(y_, return_counts=True)
        cw = 1. / counts
        cw = cw / np.mean(cw)
        sample_weight = np.ones(y_.shape)
        for i, ue in enumerate(unique):
            mask = y_ == ue
            sample_weight[mask] *= cw[i]
        params['sample_weight'] = sample_weight

    return params
项目:nba-games    作者:ixarchakos    | 项目源码 | 文件源码
def model_fitting(train_set, train_labels, classifier_name, n_jobs=cpu_count()):
    """
    The fitting process with sklearn algorithms.
    :param train_set: numpy array, required
    :param train_labels: list, required
    :param classifier_name: string, required
    :param n_jobs: integer, required
    :return: object
        - Fit classifier model according to the given training data
    """
    classifier_list = {"svm_linear": SVC(probability=True, kernel='linear', C=1.0),
                       "svm_poly": SVC(probability=True, kernel='poly', C=1.0),
                       "svm_rbf": SVC(probability=True, kernel='rbf', C=1.0, gamma=0.01),
                       "linear_svc": LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.1, C=1.0, multi_class='ovr', fit_intercept=True,
                                               intercept_scaling=1, random_state=None, max_iter=3000),
                       "knn": KNeighborsClassifier(n_neighbors=100, weights='distance', leaf_size=30, n_jobs=n_jobs),
                       "random_forests": RandomForestClassifier(n_estimators=350, criterion='entropy', min_samples_split=2,
                                                                min_samples_leaf=1, max_leaf_nodes=600, n_jobs=n_jobs),
                       "logistic_regression": LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.4, fit_intercept=True, intercept_scaling=1,
                                                                 random_state=None, solver='liblinear', max_iter=1000, multi_class='ovr',
                                                                 warm_start=False, n_jobs=n_jobs),
                       "decision_trees": DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2,
                                                                min_samples_leaf=100, min_weight_fraction_leaf=0.0, max_features=None,
                                                                random_state=None, max_leaf_nodes=None, presort=False),
                       "sgd": SGDClassifier(alpha=.0001, n_iter=500, penalty="elasticnet", n_jobs=n_jobs),
                       "neural_network": Classifier(layers=[Layer("Sigmoid", units=14), Layer("Sigmoid", units=13), Layer("Sigmoid", units=12),
                                                            Layer("Sigmoid", units=10), Layer("Softmax")], learning_rate=0.01, n_iter=200,
                                                    batch_size=10, regularize='L1', n_stable=50, dropout_rate=0, verbose=True),
                       "GBC": GradientBoostingClassifier(max_depth=10, max_leaf_nodes=850, min_samples_leaf=15, learning_rate=0.1),
                       "XGB": XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
                                            max_depth=10, min_child_weight=2, missing=None, n_estimators=100, nthread=n_jobs, reg_alpha=0,
                                            objective='binary:logistic', reg_lambda=1, scale_pos_weight=1, seed=0, silent=True, subsample=1)}
    return classifier_list[classifier_name].fit(train_set, train_labels)
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        target = scikit_data['target'] > scikit_data['target'].mean()
        scikit_model.fit(scikit_data['data'], target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_conversion_bad_inputs(self):
        # Error on converting an untrained model
        with self.assertRaises(Exception):
            model = GradientBoostingClassifier()
            spec = skl_converter.convert(model, 'data', 'out')

        # Check the expected class during covnersion.
        from sklearn.preprocessing import OneHotEncoder
        with self.assertRaises(Exception):
            model = OneHotEncoder()
            spec = skl_converter.convert(model, 'data', 'out')
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def score(self, estimator, X, y, advanced_scoring=False):

        X, y = utils.drop_missing_y_vals(X, y, output_column=None)

        if isinstance(estimator, GradientBoostingClassifier):
            X = X.toarray()

        predictions = estimator.predict_proba(X)


        if self.scoring_method == 'brier_score_loss':
            # At the moment, Microsoft's LightGBM returns probabilities > 1 and < 0, which can break some scoring functions. So we have to take the max of 1 and the pred, and the min of 0 and the pred.
            probas = [max(min(row[1], 1), 0) for row in predictions]
            predictions = probas

        try:
            score = self.scoring_func(y, predictions)
        except ValueError as e:
            bad_val_indices = []
            for idx, val in enumerate(y):
                if str(val) in bad_vals_as_strings:
                    bad_val_indices.append(idx)

            predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
            y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]

            print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset')
            try:
                score = self.scoring_func(y, predictions)
            except ValueError:
                # Sometimes, particularly for a badly fit model using either too little data, or a really bad set of hyperparameters during a grid search, we can predict probas that are > 1 or < 0. We'll cap those here, while warning the user about them, because they're unlikely to occur in a model that's properly trained with enough data and reasonable params
                predictions = self.clean_probas(predictions)
                score = self.scoring_func(y, predictions)


        if advanced_scoring:
            return (-1 * score, predictions)
        else:
            return -1 * score
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}

        for user in self.testDict:
            pred_labels[user] = classifier.predict([self.model.docvecs[user]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
        #                                         max_depth=1, random_state=0)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'GBDT:'
        # print classification_report(self.testLabel, pred_labels)
        #
        # clf = AdaBoostClassifier(n_estimators=100)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'AdaBoost:'
        # print classification_report(self.testLabel, pred_labels)
        #
        # clf = RandomForestClassifier(n_estimators=10)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Random Forest:'
        # print classification_report(self.testLabel, pred_labels)
项目:AnswerClassify    作者:kenluck2001    | 项目源码 | 文件源码
def makEnsemble( X, xlist, Y ):
    #naive bayes
    clf = MultinomialNB()
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #K nearest neighbours
    clf = KNeighborsClassifier()
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #Logistic regression
    clf = LogisticRegression(C=1)
    clf.fit( xlist, Y )
    featureSelectModel.append (clf)

    #random forest
    clf  = RandomForestClassifier(n_estimators = 400)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #extra forest
    clf = ExtraTreesClassifier(n_estimators = 400)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #decision forest
    clf = DecisionTreeClassifier(max_depth=None, min_samples_split=1, random_state=0)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)

    #gradient boosting
    params = {'n_estimators': 500, 'max_depth': 4, 'min_samples_split': 1,
                  'learning_rate': 0.01}
    clf = GradientBoostingClassifier(**params)
    clf.fit( X, Y )
    wholeFeatureModel.append (clf)
项目:Chinese_text_classifier    作者:swordLong    | 项目源码 | 文件源码
def gradient_boosting_classifier(train_x, train_y):
    from sklearn.ensemble import GradientBoostingClassifier
    model = GradientBoostingClassifier(n_estimators=200)
    model.fit(train_x, train_y)
    return model


# SVM Classifier
项目:sigopt_sklearn    作者:sigopt    | 项目源码 | 文件源码
def test_create(self):
    SigOptSearchCV(
      estimator=GradientBoostingClassifier,
      param_domains=GradientBoostingClassifier_PARAM_DOMAIN,
      client_token='client_token'
    )
项目:sigopt_sklearn    作者:sigopt    | 项目源码 | 文件源码
def test_no_token(self):
    with pytest.raises(ValueError):
      SigOptSearchCV(estimator=GradientBoostingClassifier, param_domains=GradientBoostingClassifier_PARAM_DOMAIN)
项目:sigopt_sklearn    作者:sigopt    | 项目源码 | 文件源码
def test_search(self):
    conn = sigopt.Connection()

    n_iter = 5
    folds = 3
    cv = SigOptSearchCV(
      estimator=GradientBoostingClassifier(),
      param_domains=GradientBoostingClassifier_PARAM_DOMAIN,
      client_token='client_token',
      n_iter=n_iter,
      cv=folds
    )
    assert len(conn.experiments().create.mock_calls) == 0
    assert len(conn.experiments().fetch.mock_calls) == 0
    assert len(conn.experiments().suggestions.create.mock_calls) == 0
    assert len(conn.experiments().observations.create.mock_calls) == 0

    data = sklearn.datasets.load_iris()
    cv.fit(data['data'], data['target'])
    assert len(conn.experiments().create.mock_calls) == 1
    create_definition = conn.experiments().create.call_args[1]
    assert create_definition['name'] == GradientBoostingClassifier_EXPERIMENT_DEF['name']

    assert len(create_definition['parameters']) == len(GradientBoostingClassifier_EXPERIMENT_DEF['parameters'])
    for p in GradientBoostingClassifier_EXPERIMENT_DEF['parameters']:
      assert p in create_definition['parameters']
    assert len(conn.experiments().best_assignments().fetch.mock_calls) == 1
    assert len(conn.experiments().suggestions().create.mock_calls) == n_iter * folds
    assert len(conn.experiments().observations().create.mock_calls) == n_iter * folds

    assert cv.best_params_ == zero_corner(GradientBoostingClassifier_EXPERIMENT_DEF)