Python sklearn.ensemble 模块,AdaBoostRegressor() 实例源码

我们从Python开源项目中,提取了以下26个代码示例,用于说明如何使用sklearn.ensemble.AdaBoostRegressor()

项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def cross_validate_best_known():
    '''
        import and clean the tractor data, then do a corss validation on each of the three models we are
        training here. A RandomForest, a GradientBoost, and an AdaBoost backed by a DecisionTree. Print
        the scores.

        The parameters we're using here are the "best" that we've found so far using a grid search.
    '''
    tractor_data = pd.read_csv('data/train.csv')
    tractor_data = cln.clean_all(tractor_data)
    X = tractor_data
    y = tractor_data.pop('SalePrice')

    rf = RandomForestRegressor(max_features=2, min_samples_split=4, n_estimators=50, min_samples_leaf=2)
    gb = GradientBoostingRegressor(loss='quantile', learning_rate=0.0001, n_estimators=50, max_features='log2', min_samples_split=2, max_depth=1)
    ada_tree_backing = DecisionTreeRegressor(max_features='sqrt', splitter='random', min_samples_split=4, max_depth=3)
    ab = AdaBoostRegressor(ada_tree_backing, learning_rate=0.1, loss='square', n_estimators=1000)

    validate.cross_v_scores([rf, gb, ab], X, y)
    # RandomForestRegressor -- RMLSE: -0.596797712098, R2: 0.0272065373946
    # GradientBoostingRegressor -- RMLSE: -0.996134592541, R2: -2.37202164829
    # AdaBoostRegressor -- RMLSE: -0.706385708459, R2: -0.103966980393
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostRegressor(*data):
    '''
    test the regression with different number of regression model
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    regr=ensemble.AdaBoostRegressor()
    regr.fit(X_train,y_train)
    ## graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    estimators_num=len(regr.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="best")
    ax.set_title("AdaBoostRegressor")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostRegressor_learning_rate(*data):
    '''
    test the performance with different learning rate
    :param data:   train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    learning_rates=np.linspace(0.01,1)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    traing_scores=[]
    testing_scores=[]
    for learning_rate in learning_rates:
        regr=ensemble.AdaBoostRegressor(learning_rate=learning_rate,n_estimators=500)
        regr.fit(X_train,y_train)
        traing_scores.append(regr.score(X_train,y_train))
        testing_scores.append(regr.score(X_test,y_test))
    ax.plot(learning_rates,traing_scores,label="Traing score")
    ax.plot(learning_rates,testing_scores,label="Testing score")
    ax.set_xlabel("learning rate")
    ax.set_ylabel("score")
    ax.legend(loc="best")
    ax.set_title("AdaBoostRegressor")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sample_weight_adaboost_regressor():
    """
    AdaBoostRegressor should work without sample_weights in the base estimator

    The random weighted sampling is done internally in the _boost method in
    AdaBoostRegressor.
    """
    class DummyEstimator(BaseEstimator):

        def fit(self, X, y):
            pass

        def predict(self, X):
            return np.zeros(X.shape[0])

    boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
    boost.fit(X, y_regr)
    assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
项目:Supply-demand-forecasting    作者:LevinJ    | 项目源码 | 文件源码
def setClf(self):
#         min_samples_split = 3
        self.clf = AdaBoostRegressor()
        return
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_fit_and_test(TrainX,TrainY,TestX,TestY):
    def bulid_model(model_name):
        model = model_name()
        return model
    #for model_name in [LinearRegression, Ridge, Lasso, ElasticNet, KNeighborsRegressor, DecisionTreeRegressor, SVR,RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor]:
    for model_name in [LinearRegression, ElasticNet]:
        model = bulid_model(model_name)
        model.fit(TrainX,TrainY)
        print(model_name)
        resid = model.predict(TestX) - TestY
        #print resid
        print("Residual sum of squares: %f"% np.mean(resid ** 2))
        #print model.predict(TestX)
        #print TestY
        # Explained variance score: 1 is perfect prediction
        plt.scatter(model.predict(TestX), resid);
        plt.axhline(0, color='red')
        plt.xlabel('Predicted Values')
        plt.ylabel('Residuals')
        #plt.xlim([1, 50])
        plt.show()

        print('Variance score: %.2f' % model.score(TestX, TestY))

        from statsmodels.stats.stattools import jarque_bera
        _, pvalue, _, _ = jarque_bera(resid)
        print ("Test Residuals Normal", pvalue)

        from statsmodels import regression, stats
        import statsmodels.api as sms
        import statsmodels.stats.diagnostic as smd
        # xs_with_constant = sms.add_constant(np.column_stack((X1,X2,X3,X4)))
        xs_with_constant = sms.add_constant(TestX)
        _, pvalue1, _, _ = stats.diagnostic.het_breushpagan(resid, xs_with_constant)
        print ("Test Heteroskedasticity", pvalue1)
        ljung_box = smd.acorr_ljungbox(resid, lags=10)

        #print "Lagrange Multiplier Statistics:", ljung_box[0]
        print "Test Autocorrelation P-values:", ljung_box[1]
        if any(ljung_box[1] < 0.05):
            print "The residuals are autocorrelated."
        else:
            print "The residuals are not autocorrelated."
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionAdaBoost, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        # Create AdaBoost regression object
        decisionReg = DecisionTreeRegressor(max_depth=10)
        rng = np.random.RandomState(1)
        self.adaReg = AdaBoostRegressor(decisionReg,
                          n_estimators=400,
                          random_state=rng)
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def ada_boost_tree_grid_search():
    ada_boost_tree_grid = {
        'base_estimator__max_features': ['sqrt'],
        'base_estimator__splitter': ['best', 'random'],
        'base_estimator__min_samples_split': [2, 4],
        'base_estimator__max_depth': [1, 3],
        'n_estimators': [50, 100, 1000],
        'learning_rate': [.001, .01, .1],
        'loss': ['linear', 'square', 'exponential']
    }
    abr = AdaBoostRegressor(DecisionTreeRegressor())

    return ada_boost_tree_grid, abr
项目:stock_prediction    作者:vishwajeetv    | 项目源码 | 文件源码
def adbPredictor(df):
    dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df)

    # clf = linear_model.SGDRegressor()
    clf = ensemble.AdaBoostRegressor()
    clf.fit(dataTrainX, dataTrainY)

    predicted = clf.predict(dataTestX)

    fig, ax = plotter.subplots()

    ax.set_ylabel('Predicted KNN Weekly')
    ax.scatter(dataTestY, predicted)
    ax.set_xlabel('Measured')
    predicted = np.reshape(predicted, (predicted.size, 1))
    corrCoeff = pearsonr(dataTestY,predicted)
    print(corrCoeff[0])
    plotter.show()
    return predicted
项目:mlprojects-py    作者:srinathperera    | 项目源码 | 文件源码
def __init__(self, conf, model=None):
        self.conf = conf
        self.name = "AdaBoostR"
        if model is None:
            self.model = AdaBoostRegressor(loss='square')
        else:
            self.model = model
项目:mlprojects-py    作者:srinathperera    | 项目源码 | 文件源码
def get_models4ensamble(conf):
    models = []
    #models = [RFRModel(conf), DLModel(conf), LRModel(conf)]
    #models = [LRModel(conf)]
    # see http://scikit-learn.org/stable/modules/linear_model.html

    #0 was too big to run with depth set to 1, and 1 was overfitting a bit

    if conf.command == 1:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":3, "eta":0.1, "min_child_weight":5,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}
    else:
        xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
            "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    #xgb_params = {"objective": "reg:linear", "booster":"gbtree", "max_depth":10, "eta":0.1, "min_child_weight":8,
    #    "subsample":0.5, "nthread":4, "colsample_bytree":0.5, "num_parallel_tree":1, 'gamma':0}

    models = [
                #DLModel(conf),

                #LRModel(conf, model=linear_model.BayesianRidge()),
                #LRModel(conf, model=linear_model.LassoLars(alpha=.1)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.1)),
                #LRModel(conf, model=Pipeline([('poly', PolynomialFeatures(degree=3)),
                #LRModel(conf, model=linear_model.Ridge (alpha = .5))
                #   ('linear', LinearRegression(fit_intercept=False))])),
                XGBoostModel(conf, xgb_params, use_cv=True),
                LRModel(conf, model=linear_model.Lasso(alpha = 0.3)),
                RFRModel(conf, RandomForestRegressor(oob_score=True, n_jobs=4)),
                #LRModel(conf, model=linear_model.Lasso(alpha = 0.2)),
                ETRModel(conf, model=ExtraTreesRegressor(n_jobs=4)),
                #AdaBoostRModel(conf, model=AdaBoostRegressor(loss='square'))
              ]
    return models
    #return [XGBoostModel(conf, xgb_params, use_cv=True)]
项目:groot    作者:zhpmatrix    | 项目源码 | 文件源码
def abr(X,y):
    X_train,X_validation,y_train,y_validation = train_test_split(X,y,random_state=0)
    abr_boost = AdaBoostRegressor(random_state=1)
    abr_boost.fit(X_train,y_train.ravel())
    print 'training error:',1.0 - abr_boost.score(X_train,y_train)
    print 'validation error:',1.0 - abr_boost.score(X_validation,y_validation)
    time_fit(abr_boost,X_train,y_train.ravel())
项目:poormining    作者:bowenpay    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        # rng = np.random.RandomState(1)
        clf = AdaBoostRegressor(DecisionTreeRegressor())
        clf.fit(X, Y)
        return clf
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostRegressor_base_regr(*data):
    '''
    test the regression with different number of model and regression method
    :param data:  train_data, test_data, train_value, test_value
    :return: None
    '''
    from sklearn.svm import  LinearSVR
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()
    regrs=[ensemble.AdaBoostRegressor(),
        ensemble.AdaBoostRegressor(base_estimator=LinearSVR(epsilon=0.01,C=100))]
    labels=["Decision Tree Regressor","Linear SVM Regressor"]
    for i ,regr in enumerate(regrs):
        ax=fig.add_subplot(2,1,i+1)
        regr.fit(X_train,y_train)
        ## graph
        estimators_num=len(regr.estimators_)
        X=range(1,estimators_num+1)
        ax.plot(list(X),list(regr.staged_score(X_train,y_train)),label="Traing score")
        ax.plot(list(X),list(regr.staged_score(X_test,y_test)),label="Testing score")
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="lower right")
        ax.set_ylim(-1,1)
        ax.set_title("Base_Estimator:%s"%labels[i])
    plt.suptitle("AdaBoostRegressor")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostRegressor_loss(*data):
    '''
    test the method with different loss function
    :param data:    train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    losses=['linear','square','exponential']
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    for i ,loss in enumerate(losses):
        regr=ensemble.AdaBoostRegressor(loss=loss,n_estimators=30)
        regr.fit(X_train,y_train)
        ## graph
        estimators_num=len(regr.estimators_)
        X=range(1,estimators_num+1)
        ax.plot(list(X),list(regr.staged_score(X_train,y_train)),
            label="Traing score:loss=%s"%loss)
        ax.plot(list(X),list(regr.staged_score(X_test,y_test)),
            label="Testing score:loss=%s"%loss)
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="lower right")
        ax.set_ylim(-1,1)
    plt.suptitle("AdaBoostRegressor")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression_toy():
    # Check classification on a toy dataset.
    clf = AdaBoostRegressor(random_state=0)
    clf.fit(X, y_regr)
    assert_array_equal(clf.predict(T), y_t_regr)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_boston():
    # Check consistency on dataset boston house prices.
    clf = AdaBoostRegressor(random_state=0)
    clf.fit(boston.data, boston.target)
    score = clf.score(boston.data, boston.target)
    assert score > 0.85
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_pickle():
    # Check pickability.
    import pickle

    # Adaboost classifier
    for alg in ['SAMME', 'SAMME.R']:
        obj = AdaBoostClassifier(algorithm=alg)
        obj.fit(iris.data, iris.target)
        score = obj.score(iris.data, iris.target)
        s = pickle.dumps(obj)

        obj2 = pickle.loads(s)
        assert_equal(type(obj2), obj.__class__)
        score2 = obj2.score(iris.data, iris.target)
        assert_equal(score, score2)

    # Adaboost regressor
    obj = AdaBoostRegressor(random_state=0)
    obj.fit(boston.data, boston.target)
    score = obj.score(boston.data, boston.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert_equal(type(obj2), obj.__class__)
    score2 = obj2.score(boston.data, boston.target)
    assert_equal(score, score2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sample_weight_missing():
    from sklearn.linear_model import LogisticRegression
    from sklearn.cluster import KMeans

    clf = AdaBoostClassifier(KMeans(), algorithm="SAMME")
    assert_raises(ValueError, clf.fit, X, y_regr)

    clf = AdaBoostRegressor(KMeans())
    assert_raises(ValueError, clf.fit, X, y_regr)
项目:Tback    作者:ideaplat    | 项目源码 | 文件源码
def get_classifier(self, X, Y):
        """ ????????
        :param X: ????
        :param Y: ??????
        :return: ??
        """
        # rng = np.random.RandomState(1)
        clf = AdaBoostRegressor(DecisionTreeRegressor(criterion='mse'))
        clf.fit(X, Y)
        return clf
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        dts = []
        dts.append(DecisionTreeRegressor(max_depth=5, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=7, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=9, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=11, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=12, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=14, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=15, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=17, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=19, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=21, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=22, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=24, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=26, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=27, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=31, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=33, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=35, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=37, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=39, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=41, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=43, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=45, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=47, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=49, max_features='auto'))
        dts.append(DecisionTreeRegressor(max_depth=50, max_features='auto'))


        tuned_parameters = [{'base_estimator': dts,
                             'n_estimators': range(5,700),
                             'learning_rate': [1, 2, 3]
                             }
                            ]

        reg = GridSearchCV(AdaBoostRegressor(), tuned_parameters, cv=5, scoring='mean_squared_error')
        reg.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print reg.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in reg.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "MSE for test data set:\n"
        y_true, y_pred = self.y_test, reg.predict(self.X_test)
        print mean_squared_error(y_true, y_pred)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(RegressionUniformBlending, self).__init__(isTrain)
        # data preprocessing
        #self.dataPreprocessing()

        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            #('hidden3', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 13),  # input dimension is 13
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=8,  # number of units in hidden layer
                        #hidden3_num_units=4,  # number of units in hidden layer
                        output_nonlinearity=None,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # obejctive function
                        objective_loss_function = lasagne.objectives.squared_error,

                        # optimization method:
                        update=lasagne.updates.nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.4,

                        # use 25% as validation
                        train_split=TrainSplit(eval_size=0.2),

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=100,  # we want to train this many epochs
                        verbose=0,
                        )

        # Create linear regression object
        self.linRegr = linear_model.LinearRegression()

        # Create KNN regression object
        self.knn = neighbors.KNeighborsRegressor(86, weights='distance')

        # Create Decision Tree regression object
        self.decisionTree = DecisionTreeRegressor(max_depth=7, max_features=None)

        # Create AdaBoost regression object
        decisionReg = DecisionTreeRegressor(max_depth=10)
        rng = np.random.RandomState(1)
        self.adaReg = AdaBoostRegressor(decisionReg,
                          n_estimators=400,
                          random_state=rng)

        # Create linear regression object
        self.model = RandomForestRegressor(max_features='sqrt', n_estimators=32, max_depth=39)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_staged_predict():
    # Check staged predictions.
    rng = np.random.RandomState(0)
    iris_weights = rng.randint(10, size=iris.target.shape)
    boston_weights = rng.randint(10, size=boston.target.shape)

    # AdaBoost classification
    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg, n_estimators=10)
        clf.fit(iris.data, iris.target, sample_weight=iris_weights)

        predictions = clf.predict(iris.data)
        staged_predictions = [p for p in clf.staged_predict(iris.data)]
        proba = clf.predict_proba(iris.data)
        staged_probas = [p for p in clf.staged_predict_proba(iris.data)]
        score = clf.score(iris.data, iris.target, sample_weight=iris_weights)
        staged_scores = [
            s for s in clf.staged_score(
                iris.data, iris.target, sample_weight=iris_weights)]

        assert_equal(len(staged_predictions), 10)
        assert_array_almost_equal(predictions, staged_predictions[-1])
        assert_equal(len(staged_probas), 10)
        assert_array_almost_equal(proba, staged_probas[-1])
        assert_equal(len(staged_scores), 10)
        assert_array_almost_equal(score, staged_scores[-1])

    # AdaBoost regression
    clf = AdaBoostRegressor(n_estimators=10, random_state=0)
    clf.fit(boston.data, boston.target, sample_weight=boston_weights)

    predictions = clf.predict(boston.data)
    staged_predictions = [p for p in clf.staged_predict(boston.data)]
    score = clf.score(boston.data, boston.target, sample_weight=boston_weights)
    staged_scores = [
        s for s in clf.staged_score(
            boston.data, boston.target, sample_weight=boston_weights)]

    assert_equal(len(staged_predictions), 10)
    assert_array_almost_equal(predictions, staged_predictions[-1])
    assert_equal(len(staged_scores), 10)
    assert_array_almost_equal(score, staged_scores[-1])
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sparse_regression():
    # Check regression with sparse input.

    class CustomSVR(SVR):
        """SVR variant that records the nature of the training set."""

        def fit(self, X, y, sample_weight=None):
            """Modification on fit caries data type for later verification."""
            super(CustomSVR, self).fit(X, y, sample_weight=sample_weight)
            self.data_type_ = type(X)
            return self

    X, y = datasets.make_regression(n_samples=15, n_features=50, n_targets=1,
                                    random_state=42)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

    for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix,
                          dok_matrix]:
        X_train_sparse = sparse_format(X_train)
        X_test_sparse = sparse_format(X_test)

        # Trained on sparse format
        sparse_classifier = AdaBoostRegressor(
            base_estimator=CustomSVR(),
            random_state=1
        ).fit(X_train_sparse, y_train)

        # Trained on dense format
        dense_classifier = dense_results = AdaBoostRegressor(
            base_estimator=CustomSVR(),
            random_state=1
        ).fit(X_train, y_train)

        # predict
        sparse_results = sparse_classifier.predict(X_test_sparse)
        dense_results = dense_classifier.predict(X_test)
        assert_array_equal(sparse_results, dense_results)

        # staged_predict
        sparse_results = sparse_classifier.staged_predict(X_test_sparse)
        dense_results = dense_classifier.staged_predict(X_test)
        for sprase_res, dense_res in zip(sparse_results, dense_results):
            assert_array_equal(sprase_res, dense_res)

        types = [i.data_type_ for i in sparse_classifier.estimators_]

        assert all([(t == csc_matrix or t == csr_matrix)
                   for t in types])