Python sklearn.ensemble 模块,AdaBoostClassifier() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.ensemble.AdaBoostClassifier()

项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
项目:rltk    作者:usc-isi-i2    | 项目源码 | 文件源码
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain, isOutlierRemoval):
        super(ClassificationAdaBoost, self).__init__(isTrain, isOutlierRemoval)
        # data preprocessing
        self.dataPreprocessing()

        self.dt_stump = DecisionTreeClassifier(max_depth=10)
        self.ada = AdaBoostClassifier(
            base_estimator=self.dt_stump,
            learning_rate=1,
            n_estimators=7,
            algorithm="SAMME.R")
        # self.dt_stump = DecisionTreeClassifier(max_depth=14)
        # self.ada = AdaBoostClassifier(
        #     base_estimator=self.dt_stump,
        #     learning_rate=1,
        #     n_estimators=50,
        #     algorithm="SAMME")
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=AdaBoostClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)
项目:SentiCR    作者:senticr    | 项目源码 | 文件源码
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performAdaBoostClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    print "AdaBoost: ", accuracy
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performAdaBoostClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "AdaBoost: ", accuracy
项目:stock-price-prediction    作者:chinuy    | 项目源码 | 文件源码
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
项目:DataMiningCompetitionFirstPrize    作者:lzddzh    | 项目源码 | 文件源码
def learn(x, y, test_x):
    # set sample weight
    weight_list = []
    for j in range(len(y)):
        if y[j] == "0":
            weight_list.append(variables.weight_0_ada)
        if y[j] == "1000":
            weight_list.append(variables.weight_1000_ada)
        if y[j] == "1500":
            weight_list.append(variables.weight_1500_ada)
        if y[j] == "2000":
            weight_list.append(variables.weight_2000_ada)

    clf = AdaBoostClassifier(n_estimators=variables.n_estimators_ada, learning_rate=variables.learning_rate_ada).fit(x,
                                                                                                                     y,
                                                                                                                     np.asarray(
                                                                                                                         weight_list))
    prediction_list = clf.predict(test_x)
    prediction_list_prob = clf.predict_proba(test_x)

    return prediction_list, prediction_list_prob
项目:SyConn    作者:StructuralNeurobiologyLab    | 项目源码 | 文件源码
def init_clf(clf_used, params=None):
    if params is not None:
        params_used = params
    elif clf_used == 'svm':
        params_used = svm_params
    elif clf_used == 'ada_boost':
        params_used = rf_params
    elif clf_used == 'lr':
        params_used = lr_params
    else:
        params_used = rf_params
    if clf_used == 'svm':
        clf = SVC(**params_used)
    elif clf_used == 'ada_boost':
        rf = RandomForestClassifier(**rf_params)
        clf = AdaBoostClassifier(base_estimator=rf, **params_used)
    elif clf_used == 'lr':
        clf = LogisticRegressionCV(**params_used)
    else:
        clf = RandomForestClassifier(**params_used)
    return clf
项目:Stock-Market-Analysis-and-Prediction    作者:samshara    | 项目源码 | 文件源码
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, fout, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    if savemodel == True:
        #fname_out = '{}-{}.pickle'.format(fout, datetime.now())
        fname_out = fout + '.pickle'
        with open(fname_out, 'wb') as f:
            pickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostClassifier(*data):
    '''
    test Ada score with different number of classifiers
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
    clf.fit(X_train,y_train)
    ## graph
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="best")
    ax.set_title("AdaBoostClassifier")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostClassifier_learning_rate(*data):
    '''
    test performance with different learning rate
    :param data: train_data, test_data, train_value, test_value
    :return: None
    '''
    X_train,X_test,y_train,y_test=data
    learning_rates=np.linspace(0.01,1)
    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)
    traing_scores=[]
    testing_scores=[]
    for learning_rate in learning_rates:
        clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,n_estimators=500)
        clf.fit(X_train,y_train)
        traing_scores.append(clf.score(X_train,y_train))
        testing_scores.append(clf.score(X_test,y_test))
    ax.plot(learning_rates,traing_scores,label="Traing score")
    ax.plot(learning_rates,testing_scores,label="Testing score")
    ax.set_xlabel("learning rate")
    ax.set_ylabel("score")
    ax.legend(loc="best")
    ax.set_title("AdaBoostClassifier")
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gridsearch():
    # Check that base trees can be grid-searched.
    # AdaBoost classification
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2),
                  'algorithm': ('SAMME', 'SAMME.R')}
    clf = GridSearchCV(boost, parameters)
    clf.fit(iris.data, iris.target)

    # AdaBoost regression
    boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
                              random_state=0)
    parameters = {'n_estimators': (1, 2),
                  'base_estimator__max_depth': (1, 2)}
    clf = GridSearchCV(boost, parameters)
    clf.fit(boston.data, boston.target)
项目:ISM2017    作者:ybayle    | 项目源码 | 文件源码
def classify(train=None, test=None, data=None, res_dir="res/", disp=True, outfilename=None):
    """Description of compare
    compare multiple classifier and display the best one
    """
    utils.print_success("Comparison of differents classifiers")
    if data is not None:
        train_features = data["train_features"]
        train_groundtruths = data["train_groundtruths"]
        test_features = data["test_features"]
        test_groundtruths = data["test_groundtruths"]
    else:
        train = utils.abs_path_file(train)
        test = utils.abs_path_file(test)
        train_features, train_groundtruths = read_file(train)
        test_features, test_groundtruths = read_file(test)
    if not utils.create_dir(res_dir):
        res_dir = utils.abs_path_dir(res_dir)
    classifiers = {
        "RandomForest": RandomForestClassifier(n_jobs=-1)
        # "RandomForest": RandomForestClassifier(n_estimators=5),
        # "KNeighbors":KNeighborsClassifier(3),
        # "GaussianProcess":GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
        # "DecisionTree":DecisionTreeClassifier(max_depth=5),
        # "MLP":MLPClassifier(),
        # "AdaBoost":AdaBoostClassifier(),
        # "GaussianNB":GaussianNB(),
        # "QDA":QuadraticDiscriminantAnalysis(),
        # "SVM":SVC(kernel="linear", C=0.025),
        # "GradientBoosting":GradientBoostingClassifier(),
        # "ExtraTrees":ExtraTreesClassifier(),
        # "LogisticRegression":LogisticRegression(),
        # "LinearDiscriminantAnalysis":LinearDiscriminantAnalysis()
    }
    for key in classifiers:
        utils.print_success(key)
        clf = classifiers[key]
        utils.print_info("\tFit")
        clf.fit(train_features, train_groundtruths)
        utils.print_info("\tPredict")
        predictions = clf.predict(test_features)
    return predictions
项目:SIDR    作者:damurdock    | 项目源码 | 文件源码
def constructModel(corpus, classList, features, modelOutput):
    """
    Trains a Decision Tree model on the test corpus.

    Args:
        corpus: A list of lists, containing the GC content, coverage, and class number.
        classList: A list of class names.
        features: List of variables used by each contig.
        modelOutput: Location to save model as GraphViz DOT, or False to save no model.
    Returns:
        classifier: A DecisionTreeClassifier object that has been trained on the test corpus.
    """
    corpus.sort()  # just in case
    X = []
    Y = []
    for item in corpus:
        X.append(item[:-1]) # all but the last item
        Y.append(item[-1]) # only the last item
    X_train, X_test, Y_train, Y_test = mscv.train_test_split(X, Y, test_size=0.3, random_state=0)
    # TODO: implement classifier testing and comparison, now only baggingClassifier is used as per paper
    #treeClassifier = tree.DecisionTreeClassifier()
    #treeClassifier = treeClassifier.fit(X_train, Y_train)
    #click.echo("Decision tree classifier built, score is %s out of 1.00" % treeClassifier.score(X_test, Y_test))
    baggingClassifier = ensemble.BaggingClassifier()
    baggingClassifier = baggingClassifier.fit(X_train, Y_train)
    click.echo("Bagging classifier built, score is %s out of 1.00" % baggingClassifier.score(X_test, Y_test))
    #forestClassifier = ensemble.RandomForestClassifier(n_estimators=10)
    #forestClassifier = forestClassifier.fit(X_train, Y_train)
    #click.echo("Random forest classifier built, score is %s out of 1.00" % forestClassifier.score(X_test, Y_test))
    #adaClassifier = ensemble.AdaBoostClassifier(n_estimators=100)
    #adaClassifier = adaClassifier.fit(X_train, Y_train)
    #click.echo("AdaBoost classifier built, score is %s out of 1.00" % adaClassifier.score(X_test, Y_test))
    #gradientClassifier = ensemble.GradientBoostingClassifier(n_estimators=100)
    #gradientClassifier = gradientClassifier.fit(X_train, Y_train)
    #click.echo("Gradient tree boosting classifier built, score is %s out of 1.00" % gradientClassifier.score(X_test, Y_test))
    if modelOutput:
        with open(modelOutput, 'w') as dotfile:
            tree.export_graphviz(baggingClassifier, out_file=dotfile, feature_names=features,
                                 class_names=classList, filled=True, rounded=True, special_characters=True)
    return baggingClassifier
项目:XTREE    作者:ai-se    | 项目源码 | 文件源码
def adaboost(train, test, smoteit=True):
  "ADABOOST"
  if smoteit:
    train = SMOTE(train)
  clf = AdaBoostClassifier()
  train_DF = formatData(train)
  test_DF = formatData(test)
  features = train_DF.columns[:-2]
  klass = train_DF[train_DF.columns[-2]]
  # set_trace()
  clf.fit(train_DF[features], klass)
  preds = clf.predict(test_DF[test_DF.columns[:-2]]).tolist()
  return preds
项目:dmon-adp    作者:igabriel85    | 项目源码 | 文件源码
def adaBoost(self, settings, data=None, dropna=True):
        df = self.__loadData(data, dropna)
        features = df.columns[:-1]
        X = df[features]
        y = df.iloc[:, -1].values
        seed = 7
        num_trees = 500
        kfold = model_selection.KFold(n_splits=10, random_state=seed)
        print kfold
        model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
        results = model_selection.cross_val_score(model, X, y, cv=kfold)
        model.fit(X, y)
        print results.mean()
        print model.score(X, y)
        return True
项目:intellead-classification    作者:intellead    | 项目源码 | 文件源码
def classification(lead):
    #classifiers = [
    #    ('ab', AdaBoostClassifier()),
    #    ('dt', DecisionTreeClassifier(max_depth=5)),
    #    ('kn', KNeighborsClassifier(16)),
    #]
    inputs = get_dataset_input_from_database(lead.keys())
    outputs = get_dataset_output_from_database()
    print('The total number of examples in the dataset is: %d' % (len(inputs)))
    inputs_training, inputs_test, outputs_training, outputs_test = train_test_split(inputs, outputs, test_size=0.3, random_state=42)
    print('The number of examples used for training are: %d' % (len(inputs_training)))
    print('The number of examples used for testing are: %d' % (len(inputs_test)))
    knn = KNeighborsClassifier(n_neighbors=7, p=2)
    knn.fit(inputs_training, np.ravel(outputs_training))
    print('[K=7] The probability of the algorithm to be right is: %f%%' % (knn.score(inputs_test, outputs_test) * 100))
    #voting_classifier = VotingClassifier(estimators=classifiers, voting='hard')
    #voting_classifier = voting_classifier.fit(inputs_training, np.ravel(outputs_training))
    #print('The probability of the machine to be right is: %f%%' % (voting_classifier.score(inputs_test, outputs_test) * 100))
    print('Lead data:')
    print(lead)
    data_to_predict = convert_dict_to_tuple(lead)
    print('Lead data to predict:')
    print(data_to_predict)
    lead_status = knn.predict(data_to_predict)
    lead_status_value = lead_status[0]
    #lead_status = voting_classifier.predict(data_to_predict)
    print('According to lead data, his status is: %d' % (lead_status_value))
    print('[0] unqualified [1] qualified')
    proba = knn.predict_proba(data_to_predict)
    max_proba = max(proba[0])
    print('Proba is: %d%%' %(max_proba*100))
    lead_status_dict = dict()
    dict.update(lead_status_dict, value=str(lead_status_value))
    dict.update(lead_status_dict, proba=str(max_proba))
    return lead_status_dict
项目:Audio-classification-using-Bag-of-Frames-approach    作者:amogh3892    | 项目源码 | 文件源码
def adaboost_predict(training_samples, training_labels, test_samples, test_lables,n_estimators=50, learning_rate=1.0):
    from sklearn.ensemble import AdaBoostClassifier

    clf = AdaBoostClassifier(n_estimators = n_estimators, learning_rate =learning_rate)

    t0 = time()
    clf.fit(training_samples,training_labels)
    training_time = round(time()-t0, 3)

    t0 = time()
    pred = clf.predict(test_samples)
    test_time = round(time()-t0, 3)

    from sklearn.metrics import accuracy_score

    acc = accuracy_score(pred,test_lables)

    no_features = np.array(training_samples).shape[1]
    training_samples = np.array(training_samples).shape[0]
    test_samples = np.array(test_samples).shape[0]

    with open("Temp\\results.txt","w") as outfile:
        outfile.write("Alogirthm : {}\n".format("Adaboost"))
        outfile.write("Estimators  = {}\n".format(n_estimators))
        outfile.write("Learning rate = {}\n".format(learning_rate))
        outfile.write("No of features : {}\n".format(no_features))
        outfile.write("No of training samples : {}\n".format(training_samples))
        outfile.write("No of test samples : {}\n".format(test_samples))
        outfile.write("Training time : {}\n".format(training_time))
        outfile.write("Test time : {}\n".format(test_time))
        outfile.write("Accuracy : {}\n".format(acc))

    with open("Temp\\result_labels.csv","wb") as outfile:
        np.savetxt(outfile,pred)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def __init__(self, isTrain):
        super(ClassificationHmmGeneralize, self).__init__(isTrain)
        # data preprocessing
        self.dataPreprocessing()

        self.dt_stump = DecisionTreeClassifier(max_depth=10)
        self.ada = AdaBoostClassifier(
            base_estimator=self.dt_stump,
            learning_rate=1,
            n_estimators=5,
            algorithm="SAMME.R")

        # load the general data
        # feature 0~7: flight number dummy variables
        # feature 8: departure date; feature 9: observed date state;
        # feature 10: minimum price; feature 11: maximum price

        # feature 12: output; feature 13: current price
        # feature 14: flight index
        self.X_general = np.load('inputGeneralClf_HmmParsed/X_train.npy')
        self.y_general = np.load('inputGeneralClf_HmmParsed/y_train.npy')
        self.y_general = self.y_general.reshape((self.y_general.shape[0], 1))
        self.y_general_price = np.load('inputGeneralClf_HmmParsed/y_train_price.npy')
        self.y_general_price = self.y_general_price.reshape((self.y_general_price.shape[0], 1))
        self.y_general_index = np.load('inputGeneralClf_HmmParsed/y_index.npy')
        self.y_general_index = self.y_general_index.reshape((self.y_general_index.shape[0], 1))



        self.routes_general = ["BGY_OTP", # route 1
                "BUD_VKO", # route 2
                "CRL_OTP", # route 3
                "CRL_WAW", # route 4
                "LTN_OTP", # route 5
                "LTN_PRG", # route 6
                "OTP_BGY", # route 7
                "OTP_CRL", # route 8
                "OTP_LTN", # route 9
                "PRG_LTN", # route 10
                "VKO_BUD", # route 11
                "WAW_CRL"] # route 12
项目:FLASH    作者:yuyuz    | 项目源码 | 文件源码
def get_data_preprocessor_balancing(params, y):
    d_balancing = params['layer_dict_list'][1]

    if params['balancing'] == str(d_balancing['None']) or params['balancing'] == 'None':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = None
        # for clf: ['Adasample_weightBoostClassifier', 'GradientBoostingClassifier']
        params['sample_weight'] = None
    elif params['balancing'] == str(d_balancing['weighting']) or params['balancing'] == 'weighting':
        # for fp: ['ExtraTreesClassifier', 'LinearSVC'] + clf: ['DecisionTreeClassifier', 'ExtraTreesClassifier', 'LinearSVC', 'SVC', 'RandomForestClassifier', 'SGDClassifier']
        params['class_weight'] = 'auto'
        # for clf: ['AdaBoostClassifier', 'GradientBoostingClassifier']
        if len(y.shape) > 1:
            offsets = [2 ** i for i in range(y.shape[1])]
            y_ = np.sum(y * offsets, axis=1)
        else:
            y_ = y
        unique, counts = np.unique(y_, return_counts=True)
        cw = 1. / counts
        cw = cw / np.mean(cw)
        sample_weight = np.ones(y_.shape)
        for i, ue in enumerate(unique):
            mask = y_ == ue
            sample_weight[mask] *= cw[i]
        params['sample_weight'] = sample_weight

    return params
项目:MLAB_Intuit    作者:rykard95    | 项目源码 | 文件源码
def generate_filter(X_train, y_train):

#    clf = RidgeClassifierCV(alphas=[0.01, 0.1, 1, 10]) 
    clf = RandomForestClassifier(n_jobs=4)
#    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)
    return clf
项目:Machine-Learning-Tools-on-Iris-Dataset    作者:debjitpaul    | 项目源码 | 文件源码
def perform_adaboost(self,X_train_std,y_train,X_test_std, y_test): ##perform adaboost

      ada = AdaBoostClassifier(n_estimators=10)
      ada.fit(X_train_std, y_train)
      train_score=cross_val_score(ada,X_train_std, y_train)
      print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
      test_score=cross_val_score(ada,X_test_std, y_test)
      print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
      X=X_test_std
      y=y_test
      resolution=0.01
      #Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      markers = ('s', 'x', 'o', '^', 'v')
      colors = ('red', 'blue', 'green', 'gray', 'cyan')
      cmap = ListedColormap(colors[:len(np.unique(y_test))])
      X=X_test_std
      y=y_test    
    # plot the decision surface
      x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
      x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
      xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

      Z = ada.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      Z = Z.reshape(xx1.shape)
      plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
      plt.xlim(xx1.min(), xx1.max())
      plt.ylim(xx2.min(), xx2.max())

      for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.5, c=cmap(idx),
                    marker=markers[idx], label=cl)
      plt.show()
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}

        for user in self.testDict:
            pred_labels[user] = classifier.predict([self.model.docvecs[user]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
        #                                         max_depth=1, random_state=0)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'GBDT:'
        # print classification_report(self.testLabel, pred_labels)
        #
        # clf = AdaBoostClassifier(n_estimators=100)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'AdaBoost:'
        # print classification_report(self.testLabel, pred_labels)
        #
        # clf = RandomForestClassifier(n_estimators=10)
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Random Forest:'
        # print classification_report(self.testLabel, pred_labels)
项目:stock-price-prediction    作者:chinuy    | 项目源码 | 文件源码
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, savemodel):
    """
    Ada Boosting binary Classification
    """
    # n = parameters[0]
    # l =  parameters[1]
    clf = AdaBoostClassifier()
    clf.fit(X_train, y_train)

    accuracy = clf.score(X_test, y_test)

    return accuracy
项目:extreme-learning-machines    作者:IssamLaradji    | 项目源码 | 文件源码
def test_sample_weight_elm():
    """Smoke test - AdaBoostClassifier should work with ELMClassifer."""
    X = Xdigits_binary[:50]
    y = ydigits_binary[:50]

    elm = ELMClassifier(n_hidden=20)
    clf = AdaBoostClassifier(n_estimators=3, base_estimator=elm)
    clf.fit(X, y)
    assert_greater(clf.score(X, y), 0.9)
项目:intelligentCampus    作者:Jackal007    | 项目源码 | 文件源码
def getBestOne(self, name):
        # if the classifier has already generated
        try:
            from sklearn.externals import joblib
            clf = joblib.load(name + '.pkl')
            return clf
        except:
            pass

        # if the classifier is not exists
        # search for the best loop time 
        bestAccuracyRate, n_estimators = 0, 1
        for loopTimes in range(2, 200):

            sclf = AdaBoostClassifier(base_estimator=self.clf, learning_rate=1, n_estimators=loopTimes, algorithm='SAMME')

            # cross validation to get the score
            X_train, X_test, Y_train, Y_test = train_test_split(self.X, self.Y, test_size=0.1, random_state=0)
            sclf.fit(X_train, Y_train)
            accuracyRate = sclf.score(X_test, Y_test)

            if accuracyRate > bestAccuracyRate:
                bestAccuracyRate = accuracyRate
                n_estimators = loopTimes

        # save the classifier as a dump
        joblib.dump(sclf, name + '.pkl')

        return AdaBoostClassifier(base_estimator=self.clf, learning_rate=1, n_estimators=n_estimators, algorithm='SAMME')
项目:smart_sniffer    作者:ScarWar    | 项目源码 | 文件源码
def ada_boost_classifier(self, data, target, learning_rate=1, n_estimators=400, enable_ada=False):
        ada_boost = AdaBoostClassifier(
            base_estimator=self.clf,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            algorithm="SAMME.R")
        ada_boost.fit(data, target)
        if not enable_ada:
            self.clf = ada_boost
        print "AdaBoost training finished"
项目:smart_sniffer    作者:ScarWar    | 项目源码 | 文件源码
def ada_boost_classifier_err(self, data, target, learning_rate=1, n_estimators=400, show_score=False):
        ada_boost = AdaBoostClassifier(
            base_estimator=self.clf,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            algorithm="SAMME.R")
        ada_boost.fit(data, target)
        score = ada_boost.score(data, target)
        if not show_score:
            print "Fitness score: " + str(score)
        return 1.0 - score
项目:machine-learning    作者:cinserra    | 项目源码 | 文件源码
def varius_classifiers():
    # List of tuples of a classifier and its parameters.
    clf_list = []

    clf_linearsvm = LinearSVC()
    params_linearsvm = {"C": [0.5, 1, 5, 10, 100, 10**10],"tol":[0.1, 0.0000000001],"class_weight":['balanced']}
    clf_list.append( (clf_linearsvm, params_linearsvm) )

    clf_tree = DecisionTreeClassifier()
    params_tree = { "min_samples_split":[2, 5, 10, 20],"criterion": ('gini', 'entropy')}
    clf_list.append( (clf_tree, params_tree) )

    clf_random_tree = RandomForestClassifier()
    params_random_tree = {  "n_estimators":[2, 3, 5],"criterion": ('gini', 'entropy')}
    clf_list.append( (clf_random_tree, params_random_tree) )

    clf_adaboost = AdaBoostClassifier()
    params_adaboost = { "n_estimators":[20, 30, 50, 100]}
    clf_list.append( (clf_adaboost, params_adaboost) )

    clf_knn = KNeighborsClassifier()
    params_knn = {"n_neighbors":[2, 5], "p":[2,3]}
    clf_list.append( (clf_knn, params_knn) )

    clf_log = LogisticRegression()
    params_log = {"C":[0.5, 1, 10, 10**2,10**10, 10**20],"tol":[0.1, 0.00001, 0.0000000001],"class_weight":['balanced']}
    clf_list.append( (clf_log, params_log) )

    clf_lda = LinearDiscriminantAnalysis()
    params_lda = {"n_components":[0, 1, 2, 5, 10]}
    clf_list.append( (clf_lda, params_lda) )

    logistic = LogisticRegression()
    rbm = BernoulliRBM()
    clf_rbm = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
    params_rbm = {"logistic__tol":[0.0000000001, 10**-20],"logistic__C":[0.05, 1, 10, 10**2,10**10, 10**20],"logistic__class_weight":['balanced'],"rbm__n_components":[2,3,4]}
    clf_list.append( (clf_rbm, params_rbm) )

    return clf_list
项目:yttresearch-machine-learning-algorithms-analysis    作者:gdemos01    | 项目源码 | 文件源码
def exportPresentationData(classifier,action):
        dir = input('Give Data Directory: ')

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)
项目:yttresearch-machine-learning-algorithms-analysis    作者:gdemos01    | 项目源码 | 文件源码
def exportPresentationData(classifier,action,dir):

        if int(classifier)==1:
                clf = GradientBoostingClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 2:
                clf = LogisticRegression()
                classify(dir,clf,action)
        elif int(classifier) == 3:
                clf = KNeighborsClassifier(n_neighbors=5)
                classify(dir,clf,action)
        elif int(classifier) == 4:
                clf = DecisionTreeClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 5:
                clf = svm.LinearSVC()
                classify_type2(dir,clf,action)
        elif int(classifier) == 6:
                clf = RandomForestClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 7:
                clf = ExtraTreesClassifier()
                classify(dir,clf,action)
        elif int(classifier) == 8:
                clf = IsolationForest()
                classify_type2(dir,clf,action)
        elif int(classifier) == 9:
                clf = AdaBoostClassifier(n_estimators=100)
                classify(dir,clf,action)
        elif int(classifier) == 10:
                clf = BaggingClassifier(DecisionTreeClassifier())
                classify(dir,clf,action)
        elif int(classifier) == 11:
                clf1 = GradientBoostingClassifier()
                clf2 = AdaBoostClassifier()
                clf = VotingClassifier(estimators=[('abdt', clf1), ('gbdt', clf2)], voting='soft')
                classify(dir,clf,action)
项目:fake-news-detection    作者:aldengolab    | 项目源码 | 文件源码
def define_clfs_params(self):
        '''
        Defines all relevant parameters and classes for classfier objects.
        Edit these if you wish to change parameters.
        '''
        # These are the classifiers
        self.clfs = {
            'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
            'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
            'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
            'LR': LogisticRegression(penalty = 'l1', C = 1e5),
            'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
            'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
            'NB': GaussianNB(),
            'DT': DecisionTreeClassifier(),
            'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
            'KNN': KNeighborsClassifier(n_neighbors = 3)
            }
        # These are the parameters which will be run through
        self.params = {
             'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
             'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
             'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
             'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
             'NB': {},
             'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
             'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
             'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
             }
项目:Image_Retrieval    作者:ddlricardo    | 项目源码 | 文件源码
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = ensemble.AdaBoostClassifier(naive_bayes.GaussianNB())
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict_proba(testdata)
    dbresult[i] = clf.predict_proba(dbdata)
    #print("label %s done\n%s"
    # % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
项目:Image_Retrieval    作者:ddlricardo    | 项目源码 | 文件源码
def runner(i):
    sem.acquire()
    print("learn begin %s" % i)
    clf = ensemble.AdaBoostClassifier(svm.LinearSVC())
    clf = clf.fit(traindata, trainlabel[i])
    svms.append((i, clf))
    result[i] = clf.predict_proba(testdata)
    dbresult[i] = clf.predict_proba(dbdata)
    #print("label %s done\n%s"
    # % (i, metrics.classification_report(testlabel[i], result[i])))
    #print metrics.confusion_matrix(testlabel[i], result)
    sem.release()
项目:genrec    作者:kkanellis    | 项目源码 | 文件源码
def __init__(self, genres, data, type='knn', name='', clf_kwargs=None):
        self.logger = get_logger('classifier')
        self.display_name = name

        self.genres = genres
        self.m_genres = { genre:i for i, genre in enumerate(genres) }
        self.randstate = np.random.RandomState()
        self.scaler = StandardScaler()

        clf_kwargs = { } if not clf_kwargs else clf_kwargs
        if type in ['svm', 'mlp']:
            clf_kwargs['random_state'] = self.randstate

        if type == 'knn':
            self.proto_clf = KNeighborsClassifier(**clf_kwargs)
        elif type == 'svm':
            self.proto_clf = SVC(**clf_kwargs)
        elif type == 'dtree':
            self.proto_clf = DecisionTreeClassifier(**clf_kwargs)
        elif type == 'gnb':
            self.proto_clf = GaussianNB(**clf_kwargs)
        elif type == 'perc':
            self.proto_clf = Perceptron(**clf_kwargs)
        elif type == 'mlp':
            self.proto_clf = MLPClassifier(**clf_kwargs)
        elif type == 'ada':
            self.proto_clf = AdaBoostClassifier(**clf_kwargs)
        else:
            raise LookupError('Classifier type "{}" is invalid'.format(type))

        self._convert_data(data)

        self.logger.info('Classifier: {} (params={})'.format(
            self.proto_clf.__class__.__name__,
            clf_kwargs
        ))
项目:nlp_experiments    作者:amirhdrz    | 项目源码 | 文件源码
def train_model(text_matrix, categories):
    # model = AdaBoostClassifier(
    #     DecisionTreeClassifier(max_depth=3),
    #     n_estimators=500,
    #     algorithm="SAMME")

    model = RandomForestClassifier(n_estimators=100, max_depth=8)

    model.fit(text_matrix, categories)

    return model
项目:anomalous-vertices-detection    作者:Kagandi    | 项目源码 | 文件源码
def set_adaboost_classifier(self):
        return SkLearner(ensemble.AdaBoostClassifier())
项目:sia-cog    作者:deepakkumar1984    | 项目源码 | 文件源码
def getModels():
    result = []
    result.append("LinearRegression")
    result.append("BayesianRidge")
    result.append("ARDRegression")
    result.append("ElasticNet")
    result.append("HuberRegressor")
    result.append("Lasso")
    result.append("LassoLars")
    result.append("Rigid")
    result.append("SGDRegressor")
    result.append("SVR")
    result.append("MLPClassifier")
    result.append("KNeighborsClassifier")
    result.append("SVC")
    result.append("GaussianProcessClassifier")
    result.append("DecisionTreeClassifier")
    result.append("RandomForestClassifier")
    result.append("AdaBoostClassifier")
    result.append("GaussianNB")
    result.append("LogisticRegression")
    result.append("QuadraticDiscriminantAnalysis")
    return result
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostClassifier_base_classifier(*data):
    '''
    test Adaboost classifier with different number of classifier, and category of classifier
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    from sklearn.naive_bayes import GaussianNB
    X_train,X_test,y_train,y_test=data
    fig=plt.figure()
    ax=fig.add_subplot(2,1,1)

    clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
    clf.fit(X_train,y_train)
    ## graph
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1)
    ax.set_title("AdaBoostClassifier with Decision Tree")

    ax=fig.add_subplot(2,1,2)
    clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB())
    clf.fit(X_train,y_train)
    ## graph
    estimators_num=len(clf.estimators_)
    X=range(1,estimators_num+1)
    ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
    ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
    ax.set_xlabel("estimator num")
    ax.set_ylabel("score")
    ax.legend(loc="lower right")
    ax.set_ylim(0,1)
    ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes")
    plt.show()
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_AdaBoostClassifier_algorithm(*data):
    '''
    test performance with different algorithm
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    algorithms=['SAMME.R','SAMME']
    fig=plt.figure()
    learning_rates=[0.05,0.1,0.5,0.9]
    for i,learning_rate in enumerate(learning_rates):
        ax=fig.add_subplot(2,2,i+1)
        for i ,algorithm in enumerate(algorithms):
            clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,
                algorithm=algorithm)
            clf.fit(X_train,y_train)
            ## ??
            estimators_num=len(clf.estimators_)
            X=range(1,estimators_num+1)
            ax.plot(list(X),list(clf.staged_score(X_train,y_train)),
                label="%s:Traing score"%algorithms[i])
            ax.plot(list(X),list(clf.staged_score(X_test,y_test)),
                label="%s:Testing score"%algorithms[i])
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="lower right")
        ax.set_title("learing rate:%f"%learning_rate)
    fig.suptitle("AdaBoostClassifier")
    plt.show()
项目:sport-news-retrieval    作者:Andyccs    | 项目源码 | 文件源码
def ensemble_classify():
  label_list = get_labels()
  tweet_list = get_labelled_tweets()
  # vectorise using tf-idf
  vectoriser = TfidfVectorizer(min_df=3,
                               max_features=None,
                               strip_accents='unicode',
                               analyzer='word',
                               token_pattern=r'\w{1,}',
                               ngram_range=(1, 2),
                               use_idf=1,
                               smooth_idf=1,
                               sublinear_tf=1,)

  ## do transformation into vector
  vectoriser.fit(tweet_list)
  vectorised_tweet_list = vectoriser.transform(tweet_list)
  train_vector, test_vector, train_labels, test_labels = train_test_split(vectorised_tweet_list,
                                                                          label_list,
                                                                          test_size=0.8,
                                                                          random_state=42)

  n_estimators = 10  # number of weak learners
  model = AdaBoostClassifier(n_estimators=n_estimators)
  ada_classifier = model.fit(train_vector, train_labels)
  result = ada_classifier.predict(test_vector)

  # output result to csv
  create_directory('data')
  result.tofile("data/tfidf_ada.csv", sep=',')
  save_model(ada_classifier, 'tfidf_ada')

  # evaluation
  binarise_result = label_binarize(result, classes=class_list)
  binarise_labels = label_binarize(test_labels, classes=class_list)
  generate_eval_metrics(binarise_result, 'tfidf_ada', binarise_labels)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_classification_toy():
    # Check classification on a toy dataset.
    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg, random_state=0)
        clf.fit(X, y_class)
        assert_array_equal(clf.predict(T), y_t_class)
        assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_)
        assert_equal(clf.predict_proba(T).shape, (len(T), 2))
        assert_equal(clf.decision_function(T).shape, (len(T),))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_iris():
    # Check consistency on dataset iris.
    classes = np.unique(iris.target)
    clf_samme = prob_samme = None

    for alg in ['SAMME', 'SAMME.R']:
        clf = AdaBoostClassifier(algorithm=alg)
        clf.fit(iris.data, iris.target)

        assert_array_equal(classes, clf.classes_)
        proba = clf.predict_proba(iris.data)
        if alg == "SAMME":
            clf_samme = clf
            prob_samme = proba
        assert_equal(proba.shape[1], len(classes))
        assert_equal(clf.decision_function(iris.data).shape[1], len(classes))

        score = clf.score(iris.data, iris.target)
        assert score > 0.9, "Failed with algorithm %s and score = %f" % \
            (alg, score)

    # Somewhat hacky regression test: prior to
    # ae7adc880d624615a34bafdb1d75ef67051b8200,
    # predict_proba returned SAMME.R values for SAMME.
    clf_samme.algorithm = "SAMME.R"
    assert_array_less(0,
                      np.abs(clf_samme.predict_proba(iris.data) - prob_samme))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_pickle():
    # Check pickability.
    import pickle

    # Adaboost classifier
    for alg in ['SAMME', 'SAMME.R']:
        obj = AdaBoostClassifier(algorithm=alg)
        obj.fit(iris.data, iris.target)
        score = obj.score(iris.data, iris.target)
        s = pickle.dumps(obj)

        obj2 = pickle.loads(s)
        assert_equal(type(obj2), obj.__class__)
        score2 = obj2.score(iris.data, iris.target)
        assert_equal(score, score2)

    # Adaboost regressor
    obj = AdaBoostRegressor(random_state=0)
    obj.fit(boston.data, boston.target)
    score = obj.score(boston.data, boston.target)
    s = pickle.dumps(obj)

    obj2 = pickle.loads(s)
    assert_equal(type(obj2), obj.__class__)
    score2 = obj2.score(boston.data, boston.target)
    assert_equal(score, score2)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_error():
    # Test that it gives proper exception on deficient input.
    assert_raises(ValueError,
                  AdaBoostClassifier(learning_rate=-1).fit,
                  X, y_class)

    assert_raises(ValueError,
                  AdaBoostClassifier(algorithm="foo").fit,
                  X, y_class)

    assert_raises(ValueError,
                  AdaBoostClassifier().fit,
                  X, y_class, sample_weight=np.asarray([-1]))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_base_estimator():
    # Test different base estimators.
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.svm import SVC

    # XXX doesn't work with y_class because RF doesn't support classes_
    # Shouldn't AdaBoost run a LabelBinarizer?
    clf = AdaBoostClassifier(RandomForestClassifier())
    clf.fit(X, y_regr)

    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
    clf.fit(X, y_class)

    from sklearn.ensemble import RandomForestRegressor
    from sklearn.svm import SVR

    clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0)
    clf.fit(X, y_regr)

    clf = AdaBoostRegressor(SVR(), random_state=0)
    clf.fit(X, y_regr)

    # Check that an empty discrete ensemble fails in fit, not predict.
    X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
    y_fail = ["foo", "bar", 1, 2]
    clf = AdaBoostClassifier(SVC(), algorithm="SAMME")
    assert_raises_regexp(ValueError, "worse than random",
                         clf.fit, X_fail, y_fail)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_sample_weight_missing():
    from sklearn.linear_model import LogisticRegression
    from sklearn.cluster import KMeans

    clf = AdaBoostClassifier(KMeans(), algorithm="SAMME")
    assert_raises(ValueError, clf.fit, X, y_regr)

    clf = AdaBoostRegressor(KMeans())
    assert_raises(ValueError, clf.fit, X, y_regr)