Python sklearn.ensemble 模块,RandomForestClassifier() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.ensemble.RandomForestClassifier()

项目:triage    作者:dssg    | 项目源码 | 文件源码
def trained_models():
    dataset = datasets.load_breast_cancer()
    X = dataset.data
    y = dataset.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=12345)

    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    lr = LogisticRegression()
    lr.fit(X_train, y_train)

    svc_w_linear_kernel = SVC(kernel='linear')
    svc_w_linear_kernel.fit(X_train, y_train)

    svc_wo_linear_kernel = SVC()
    svc_wo_linear_kernel.fit(X_train, y_train)

    dummy = DummyClassifier()
    dummy.fit(X_train, y_train)

    return {'RF':rf, 'LR':lr, 'SVC_w_linear_kernel':svc_w_linear_kernel,
            'Dummy':dummy, 'SVC_wo_linear_kernel':svc_wo_linear_kernel}
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def get_feature_importance(self,clf, model_name ):
        clfs = {'RandomForestClassifier':'feature_importances',
                'ExtraTreesClassifier': 'feature_importances',
                'AdaBoostClassifier': 'feature_importances',
                'LogisticRegression': 'coef',
                'svm.SVC': 'coef',
                'GradientBoostingClassifier': 'feature_importances',
                'GaussianNB': None,
                'DecisionTreeClassifier': 'feature_importances',
                'SGDClassifier': 'coef',
                'KNeighborsClassifier': None,
                'linear.SVC': 'coef'}

        if clfs[model_name] == 'feature_importances':
            return  list(clf.feature_importances_)
        elif clfs[model_name] == 'coef':
            return  list(clf.coef_.tolist())
        else:
            return None
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        hyperopt = HyperoptOptimizer(model, [p1], clf_score)
        best_params, best_model = hyperopt.fit(X_train=data, y_train=target, n_iters=10)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)

        for status in hyperopt.trials.statuses():
            self.assertEqual(status, 'ok')
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def get_feature_selection_model_from_name(type_of_estimator, model_name):
    model_map = {
        'classifier': {
            'SelectFromModel': SelectFromModel(RandomForestClassifier(n_jobs=-1, max_depth=10, n_estimators=15), threshold='20*mean'),
            'RFECV': RFECV(estimator=RandomForestClassifier(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLogisticRegression(),
            'KeepAll': 'KeepAll'
        },
        'regressor': {
            'SelectFromModel': SelectFromModel(RandomForestRegressor(n_jobs=-1, max_depth=10, n_estimators=15), threshold='0.7*mean'),
            'RFECV': RFECV(estimator=RandomForestRegressor(n_jobs=-1), step=0.1),
            'GenericUnivariateSelect': GenericUnivariateSelect(),
            'RandomizedSparse': RandomizedLasso(),
            'KeepAll': 'KeepAll'
        }
    }

    return model_map[type_of_estimator][model_name]
项目:rltk    作者:usc-isi-i2    | 项目源码 | 文件源码
def get_classifier_class(class_name):
    name_table = {
        'svm': SVC,
        'k_neighbors': KNeighborsClassifier,
        'gaussian_process': GaussianProcessClassifier,
        'decision_tree': DecisionTreeClassifier,
        'random_forest': RandomForestClassifier,
        'ada_boost': AdaBoostClassifier,
        'mlp': MLPClassifier,
        'gaussian_naive_bayes': GaussianNB,
        'quadratic_discriminant_analysis': QuadraticDiscriminantAnalysis
    }

    if class_name not in name_table:
        raise ValueError('No such classifier')

    return name_table[class_name]
项目:oss-github-analysis-project    作者:itu-oss-project-team    | 项目源码 | 文件源码
def __create_classifiers(self):
        classifiers = list()
        classifiers.append({"func": linear_model.SGDClassifier(loss="log"),
                            "name": "sgd"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(1, weights='distance'),
                            "name": "knn1"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(3, weights='distance'),
                            "name": "knn3"})
        classifiers.append({"func": neighbors.KNeighborsClassifier(5, weights='distance'),
                            "name": "knn5"})
        classifiers.append({"func": GaussianNB(),
                            "name": "naive_bayes"})

        # classifiers.append({"func": tree.DecisionTreeClassifier(), "name": "decision_tree"})
        # classifiers.append({"func": MLPClassifier(max_iter=10000), "name": "mlp"})
        # classifiers.append({"func": RandomForestClassifier(), "name": "random_forest"})
        return classifiers
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def define_model(self, model, parameters, n_cores = 0):
        clfs = {'RandomForestClassifier': RandomForestClassifier(n_estimators=50, n_jobs=7),
                'ExtraTreesClassifier': ExtraTreesClassifier(n_estimators=10, n_jobs=7, criterion='entropy'),
                'AdaBoostClassifier': AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", n_estimators=200),
                'LogisticRegression': LogisticRegression(penalty='l1', C=1e5),
                'svm.SVC': svm.SVC(kernel='linear', probability=True, random_state=0),
                'GradientBoostingClassifier': GradientBoostingClassifier(learning_rate=0.05, subsample=0.5, max_depth=6, n_estimators=10),
                'GaussianNB': GaussianNB(),
                'DecisionTreeClassifier': DecisionTreeClassifier(),
                'SGDClassifier': SGDClassifier(loss="hinge", penalty="l2", n_jobs=7),
                'KNeighborsClassifier': KNeighborsClassifier(n_neighbors=3), 
                'linear.SVC': svm.LinearSVC() }

        if model not in clfs:
            raise ConfigError("Unsupported model {}".format(model))

        clf = clfs[model]
        clf.set_params(**parameters)
        return clf
项目:Stock-Market-Prediction    作者:Diptiranjan1    | 项目源码 | 文件源码
def do_ml(ticker):
    X, y, df = extract_featuresets(ticker)

    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
                                                        y,
                                                        test_size=0.25)

    #clf = neighbors.KNeighborsClassifier()

    clf = VotingClassifier([('lsvc',svm.LinearSVC()),
                            ('knn',neighbors.KNeighborsClassifier()),
                            ('rfor',RandomForestClassifier())])


    clf.fit(X_train, y_train)
    confidence = clf.score(X_test, y_test)
    print('accuracy:',confidence)
    predictions = clf.predict(X_test)
    print('predicted class counts:',Counter(predictions))
    print()
    print()
    return confidence

# examples of running:
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_forests():    
    print('random forest: \n')   
    params = []
    scores = []

    for _ in range(5):
        max_features = np.random.randint(400,800)
        max_depth = np.random.choice([None, None, None, None, 30, 40, 60])
        forest = RandomForestClassifier(n_estimators=50,
                                        max_features=max_features,
                                        max_depth=max_depth)                                   
        forest_fit = forest.fit(X_train, Y_train)
        pred = forest_fit.predict(X_test)
        print('\n params:', dict(max_features=max_features, max_depth=max_depth))
        print('forest train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), ' test: ',
                  zero_one_score(Y_test, pred))

        params.append( (max_features, max_depth) )
        scores.append( zero_one_score(Y_test, pred))

    print('best:', params[np.argmin(scores)])
项目:SourceFilterContoursMelody    作者:juanjobosch    | 项目源码 | 文件源码
def train_clf(x_train, y_train, best_depth):
    """ Train classifier.

    Parameters
    ----------
    x_train : np.array [n_samples, n_features]
        Training features.
    y_train : np.array [n_samples]
        Training labels
    best_depth : int
        Optimal max_depth parameter

    Returns
    -------
    clf : classifier
        Trained scikit-learn classifier
    """
    clf = RFC(n_estimators=100, max_depth=best_depth, n_jobs=-1,
              class_weight='auto', max_features=None)
    clf = clf.fit(x_train, y_train)
    return clf
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def __init__(
        self,data_block, predictors=[],cv_folds=10,
        scoring_metric='accuracy',additional_display_metrics=[]):

        base_classification.__init__(
            self, alg=RandomForestClassifier(), data_block=data_block, 
            predictors=predictors,cv_folds=cv_folds,
            scoring_metric=scoring_metric, 
            additional_display_metrics=additional_display_metrics
            )

        self.model_output = pd.Series(self.default_parameters)
        self.model_output['Feature_Importance'] = "-"
        self.model_output['OOB_Score'] = "-"

        #Set parameters to default values:
        self.set_parameters(set_default=True)
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        grid_sizes = {'max_depth': 5}
        grid_search = GridSearchOptimizer(model, [p1], clf_score, grid_sizes)
        best_params, best_model = grid_search.fit(X_train=data, y_train=target)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_objective_function(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=10,
                                   n_informative=10,
                                   n_redundant=0,
                                   class_sep=100,
                                   n_clusters_per_class=1,
                                   flip_y=0.0)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        fun = partial(objective, model, 
                                 'sklearn', 
                                 clf_score,
                                 data, target, data, target)
        # model should fit the data perfectly
        final_score = fun(model.get_params())[0]
        self.assertEqual(final_score,1)
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_expected_improvement_tractable(self):
        np.random.seed(5)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='expected_improvement')
        best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10)
        self.assertTrue(bayesOpt.success)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_probability_of_improvement_tractable(self):
        np.random.seed(5)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='probability_of_improvement')
        best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10)
        self.assertTrue(bayesOpt.success)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_upper_confidence_bound_tractable(self):
        np.random.seed(5)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='upper_confidence_bound')
        best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10)
        self.assertTrue(bayesOpt.success)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)
项目:OptML    作者:johannespetrat    | 项目源码 | 文件源码
def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                   n_features=45,
                                   n_informative=15,
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        rand_search = RandomSearchOptimizer(model, [p1], clf_score)
        best_params, best_model = rand_search.fit(X_train=data, y_train=target, n_iters=10)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score>start_score)
项目:stacker    作者:bamine    | 项目源码 | 文件源码
def __init__(self, task: Task, scorer: Scorer, opt_logger: OptimizationLogger=VoidLogger(None)):
        if task.task == "classification":
            space = RandomForestOptimizer.Params.classification_space
            model = ensemble.RandomForestClassifier()
        else:
            space = RandomForestOptimizer.Params.regression_space
            model = ensemble.RandomForestRegressor()
        super().__init__(model, task, space, scorer, opt_logger)
项目:XTREE    作者:ai-se    | 项目源码 | 文件源码
def learns(tests,trains,indep=lambda x: x[:-1],
                    dep = lambda x: x[-1],
                    rf  = Abcd(),
                    lg  = Abcd(),
                    dt  = Abcd(),
                    nb  = Abcd()):
  x1,y1,x2,y2= trainTest(tests,trains,indep,dep) 
  forest = RandomForestClassifier(n_estimators = 50)  
  forest = forest.fit(x1,y1)
  for n,got in enumerate(forest.predict(x2)):
    rf(predicted = got, actual = y2[n])
  logreg = linear_model.LogisticRegression(C=1e5)
  logreg.fit(x1, y1)
  for n,got in enumerate(logreg.predict(x2)):
    lg(predicted = got, actual = y2[n])
  bayes =  GaussianNB()
  bayes.fit(x1,y1)
  for n,got in enumerate(bayes.predict(x2)):
    nb(predicted = got, actual = y2[n])
  dectree = DecisionTreeClassifier(criterion="entropy",
                         random_state=1)
  dectree.fit(x1,y1)
  for n,got in enumerate(dectree.predict(x2)):
    dt(predicted = got, actual = y2[n])
项目:XTREE    作者:ai-se    | 项目源码 | 文件源码
def rforest(train, test, tunings=None, smoteit=True, duplicate=True):
  "RF "
  # Apply random forest Classifier to predict the number of bugs.
  if smoteit:
    train = SMOTE(train, atleast=50, atmost=101, resample=duplicate)
  if not tunings:
    clf = RandomForestClassifier(n_estimators=100, random_state=1)
  else:
    clf = RandomForestClassifier(n_estimators=int(tunings[0]),
                                 max_features=tunings[1] / 100,
                                 min_samples_leaf=int(tunings[2]),
                                 min_samples_split=int(tunings[3])
                                 )
  train_DF = formatData(train)
  test_DF = formatData(test)
  features = train_DF.columns[:-2]
  klass = train_DF[train_DF.columns[-2]]
  # set_trace()
  clf.fit(train_DF[features], klass)
  preds = clf.predict(test_DF[test_DF.columns[:-2]])
  return preds
项目:MLBox    作者:AxeldeRomblay    | 项目源码 | 文件源码
def __init__(self,
                 estimator=RandomForestClassifier(n_estimators=50,
                                                  n_jobs=-1,
                                                  max_features=1.,
                                                  min_samples_leaf=5,
                                                  max_depth=5),
                 n_folds=2,
                 stratify=True,
                 random_state=1):

        self.estimator = estimator
        self.n_folds = n_folds
        self.stratify = stratify
        self.random_state = random_state
        self.__cv = None
        self.__pred = None
        self.__target = None
        self.__fitOK = False
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_stacked_classfier_extkfold(self):
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = StackedClassifier(bclf,
                               clfs,
                               n_folds=3,
                               verbose=0,
                               Kfold=StratifiedKFold(self.iris.target, 3),
                               stack_by_proba=False,
                               oob_score_flag=True,
                               oob_metrics=log_loss)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_fwls_classfier(self):
        feature_func = lambda x: np.ones(x.shape)
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = FWLSClassifier(bclf,
                            clfs,
                            feature_func=feature_func,
                            n_folds=3,
                            verbose=0,
                            Kfold=StratifiedKFold(self.iris.target, 3),
                            stack_by_proba=False)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))
项目:stacked_generalization    作者:fukatani    | 项目源码 | 文件源码
def test_classifier(self):
        index = [i for i in range(len(self.iris.data))]

        rf = RandomForestClassifier()
        jrf = JoblibedClassifier(rf, "rf", cache_dir='')
        jrf.fit(self.iris.data, self.iris.target, index)
        prediction = jrf.predict(self.iris.data, index)
        score = accuracy_score(self.iris.target, prediction)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))

        rf = RandomForestClassifier(n_estimators=20)
        jrf = JoblibedClassifier(rf, "rf", cache_dir='')
        jrf.fit(self.iris.data, self.iris.target)
        index = [i for i in range(len(self.iris.data))]
        prediction2 = jrf.predict(self.iris.data, index)
        self.assertTrue((prediction == prediction2).all())
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def prec_rf(n_trees, X_train, y_train, X_test, y_test):
    """
    ExtraTrees
    """
    from sklearn.ensemble import RandomForestClassifier
    if not issparse(X_train):
        X_train = X_train.reshape((X_train.shape[0], -1))
    if not issparse(X_test):
        X_test = X_test.reshape((X_test.shape[0], -1))
    LOGGER.info('start predict: n_trees={},X_train.shape={},y_train.shape={},X_test.shape={},y_test.shape={}'.format(
        n_trees, X_train.shape, y_train.shape, X_test.shape, y_test.shape))
    clf = RandomForestClassifier(n_estimators=n_trees, max_depth=None, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    prec = float(np.sum(y_pred == y_test)) / len(y_test)
    LOGGER.info('prec_rf{}={:.6f}%'.format(n_trees, prec*100.0))
    return clf, y_pred
项目:gcForest    作者:kingfengji    | 项目源码 | 文件源码
def get_toy_config():
    config = {}
    ca_config = {}
    ca_config["random_state"] = 0
    ca_config["max_layers"] = 100
    ca_config["early_stopping_rounds"] = 3
    ca_config["n_classes"] = 10
    ca_config["estimators"] = []
    ca_config["estimators"].append(
            {"n_folds": 5, "type": "XGBClassifier", "n_estimators": 10, "max_depth": 5,
             "objective": "multi:softprob", "silent": True, "nthread": -1, "learning_rate": 0.1} )
    ca_config["estimators"].append({"n_folds": 5, "type": "RandomForestClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
    ca_config["estimators"].append({"n_folds": 5, "type": "ExtraTreesClassifier", "n_estimators": 10, "max_depth": None, "n_jobs": -1})
    ca_config["estimators"].append({"n_folds": 5, "type": "LogisticRegression"})
    config["cascade"] = ca_config
    return config
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestClassifier(n_estimators=30), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
项目:SentiCR    作者:senticr    | 项目源码 | 文件源码
def get_classifier(self):
        algo=self.algo

        if algo=="GBT":
            return GradientBoostingClassifier()
        elif algo=="RF":
            return  RandomForestClassifier()
        elif algo=="ADB":
            return AdaBoostClassifier()
        elif algo =="DT":
            return  DecisionTreeClassifier()
        elif algo=="NB":
            return  BernoulliNB()
        elif algo=="SGD":
            return  SGDClassifier()
        elif algo=="SVC":
            return LinearSVC()
        elif algo=="MLPC":
            return MLPClassifier(activation='logistic',  batch_size='auto',
            early_stopping=True, hidden_layer_sizes=(100,), learning_rate='adaptive',
            learning_rate_init=0.1, max_iter=5000, random_state=1,
            solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
            warm_start=False)
        return 0
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performRFClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Random Forest Binary Classification
    """

    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)    

    accuracy = clf.score(X_test, y_test)

    return accuracy
项目:stock_trend_prediction    作者:r12543    | 项目源码 | 文件源码
def performRFClass(X_train, y_train, X_test, y_test, fout, savemodel):
    """
    Random Forest Binary Classification
    """

    clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
    clf.fit(X_train, y_train)

    # if savemodel == True:
    #   fname_out = '{}-{}.pickle'.format(fout, datetime.now())
    #   with open(fname_out, 'wb') as f:
    #       cPickle.dump(clf, f, -1)

    accuracy = clf.score(X_test, y_test)

    print "RF: ", accuracy
项目:US-TransportationMode    作者:vlomonaco    | 项目源码 | 文件源码
def random_forest(self, sensors_set):
        features = list(self.dataset.get_sensors_set_features(sensors_set))
        print("RANDOM FOREST.....")
        print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set))
        print("NUMBER OF FEATURES: ", len(features))
        train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification(
            self.dataset.get_train, self.dataset.get_test, features)
        classifier_forest = RandomForestClassifier(n_estimators=const.PAR_RF_ESTIMATOR)
        classifier_forest.fit(train_features, train_classes)
        test_prediction = classifier_forest.predict(test_features)
        acc = accuracy_score(test_classes, test_prediction)
        df_feature = pd.DataFrame(
            {'accuracy': acc, 'featureName': features, 'importance': classifier_forest.feature_importances_})
        df_feature = df_feature.sort_values(by='importance', ascending=False)
        print("ACCURACY : " + str(acc))
        print("END RANDOM FOREST")

        if not os.path.exists(const.DIR_RESULTS):
            os.makedirs(const.DIR_RESULTS)
        df_feature.to_csv(const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_RANDOM_FOREST_RESULTS, index=False)

    # neural network algorithm training on training al train set and test on all test set
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def test_random_forest_classifier(self):
        for dtype in self.number_data_type.keys():
            scikit_model = RandomForestClassifier(random_state=1)
            data = self.scikit_data['data'].astype(dtype)
            target = self.scikit_data['target'].astype(dtype) > self.scikit_data['target'].astype(dtype).mean()
            scikit_model, spec = self._sklearn_setup(scikit_model, dtype, data, target)
            test_data = data[0].reshape(1, -1)
            self._check_tree_model(spec, 'multiArrayType', 'int64Type', 2)
            coreml_model = create_model(spec)
            try:
                self.assertEqual(scikit_model.predict(test_data)[0],
                                 bool(int(coreml_model.predict({'data': test_data})['target'])),
                                 msg="{} != {} for Dtype: {}".format(
                                     scikit_model.predict(test_data)[0],
                                     bool(int(coreml_model.predict({'data': test_data})['target'])),
                                     dtype
                                 )
                                 )
            except RuntimeError:
                print("{} not supported. ".format(dtype))
项目:cgpm    作者:probcomp    | 项目源码 | 文件源码
def __init__(self, outputs, inputs, k=None, hypers=None, params=None,
            distargs=None, rng=None):
        self.rng = gu.gen_rng() if rng is None else rng
        self.outputs = outputs
        self.inputs = inputs
        self.rng = gu.gen_rng() if rng is None else rng
        assert len(self.outputs) == 1
        assert len(self.inputs) >= 1
        assert self.outputs[0] not in self.inputs
        assert len(distargs['inputs']['stattypes']) == len(self.inputs)
        self.stattypes = distargs['inputs']['stattypes']
        # Number of output categories and input dimension.
        # XXX WHATTA HACK. BayesDB passes in top-level kwargs, not in distargs.
        self.k = k if k is not None else int(distargs['k'])
        self.p = len(distargs['inputs']['stattypes'])
        # Sufficient statistics.
        self.N = 0
        self.data = Data(x=OrderedDict(), Y=OrderedDict())
        self.counts = [0] * self.k
        # Outlier and random forest parameters.
        if params is None: params = {}
        self.alpha = params.get('alpha', .1)
        self.regressor = params.get('forest', None)
        if self.regressor is None:
            self.regressor = RandomForestClassifier(random_state=self.rng)
项目:MLAB_Intuit    作者:rykard95    | 项目源码 | 文件源码
def rf_categorize(email):
    # get training corpus
    emails = []
    db = utils.get_local_db()
    for collection in db.collection_names():
        for record in db.get_collection(collection).find():
            emails.append([collection] + [record['Text']])

    # vectorize corpus
    labels = [row[0] for row in emails]
    data = [row[1] for row in emails]
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(data)
    X = X.toarray()

    # vectorize input
    email_vector = vectorizer.transform([email])

    # create random forest and return prediction
    forest = RandomForestClassifier(n_estimators = int(sqrt(len(X[0])))+1)
    forest.fit(X, labels)
    return forest.predict(email_vector)[0]
项目:magic    作者:pan-webis-de    | 项目源码 | 文件源码
def get_classifier(method='logistic_regression'):
    if 'logistic_regression' == method:
        return LogisticRegression(C=1e3,
                                  tol=0.01,
                                  multi_class='ovr',
                                  solver='liblinear',
                                  n_jobs=-1,
                                  random_state=123)
    if 'random_forest' == method:
        return RandomForestClassifier(n_estimators=250,
                                      bootstrap=False,
                                      n_jobs=-1,
                                      random_state=123)

    if 'gradient_boosting' == method:
        return xgb.XGBClassifier(max_depth=10,
                                 subsample=0.7,
                                 n_estimators=500,
                                 min_child_weight=0.05,
                                 colsample_bytree=0.3,
                                 learning_rate=0.1)
项目:ML_lessons    作者:supcom-machine-learning    | 项目源码 | 文件源码
def applyRandomForestClassifier(self, train, test):
        #init algorithm 
        RFC = RandomForestClassifier()

        #training target 
        y_train = train[["Survived"]]
        x_train = train[train.columns.difference(["PassengerId","Survived"])]

        #fitting 
        RFC.fit(x_train, y_train)

        result = RFC.predict(test[test.columns.difference(["PassengerId"])])

        self.writeMessage("current training score")
        print RFC.score(x_train, y_train)

        test["Survived"] = result 

        return test
项目:stock-price-prediction    作者:chinuy    | 项目源码 | 文件源码
def buildModel(dataset, method, parameters):
    """
    Build final model for predicting real testing data
    """
    features = dataset.columns[0:-1]

    if method == 'RNN':
        clf = performRNNlass(dataset[features], dataset['UpDown'])
        return clf

    elif method == 'RF':
        clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)

    elif method == 'KNN':
        clf = neighbors.KNeighborsClassifier()

    elif method == 'SVM':
        c = parameters[0]
        g =  parameters[1]
        clf = SVC(C=c, gamma=g)

    elif method == 'ADA':
        clf = AdaBoostClassifier()

    return clf.fit(dataset[features], dataset['UpDown'])
项目:StockRecommendSystem    作者:doncat99    | 项目源码 | 文件源码
def build_model(self, X_train, y_train):
        if self.paras.load == True:
            model = self.load_training_model(self.paras.window_len)
            if model != None:
                return model

        print('build Random Forrest model...')

        # range of number of trees : 5*(1 -> 10) = 5,10,...,50 trees
        t_min = self.paras.tree_min[index]
        t_max = self.paras.tree_max[index]
        # range of max of features : 1 -> 10 features
        f_min = self.paras.feature_min[index]
        f_max = self.paras.feature_max[index]
        # range of window : 1 -> 70 days 
        w_min = self.paras.window_min
        w_max = self.paras.window_max

        w_opt, n_opt, m_opt = self.best_window(X_train, y_train, w_min,w_max,t_min,t_max,f_min,f_max)
        model = RandomForestClassifier(n_estimators=n_opt,max_features=m_opt, n_jobs=8, verbose=self.paras.verbose)
        return model
项目:AutoML5    作者:djajetic    | 项目源码 | 文件源码
def __init__(self, info, verbose=True, debug_mode=False):
        self.label_num=info['label_num']
        self.target_num=info['target_num']
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
            return
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            else:
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1,  max_depth=4, min_samples_split=14, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
        else:
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
            else:
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba
项目:hyperband    作者:zygmuntz    | 项目源码 | 文件源码
def try_params( n_iterations, params ):

    n_estimators = int( round( n_iterations * trees_per_iteration ))
    print "n_estimators:", n_estimators
    pprint( params )

    clf = RF( n_estimators = n_estimators, verbose = 0, n_jobs = -1, **params )

    return train_and_eval_sklearn_classifier( clf, data )
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def run_predict_random_forest(X_train,Y_train,X_test,Y_test, n_estimators=30, max_features=500, show_mistakes=False):
    forest = RandomForestClassifier(n_estimators=10, max_features=20, max_depth=10) 
    clf = SKClassifier(forest)
    forest_fit = clf.fit(X_train, Y_train)
    pred = forest_fit.predict(X_test)
    print('\n Random forest 0-1 error.  \n Train: ',zero_one_score(Y_train, forest_fit.predict(X_train)), '\n Test: ', 
      zero_one_score(Y_test, pred))


    met = clf.metrics(X_test,Y_test)
    if show_mistakes:
        mis = clf.show_mistakes(X_test,Y_test,10)
    print('Metrics:', met)
    return clf