Python sklearn.model_selection 模块,cross_val_score() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.model_selection.cross_val_score()

项目:playground    作者:Pennsy    | 项目源码 | 文件源码
def learn_decision_tree(data):
    DT = tree.DecisionTreeClassifier(max_depth=7)
    scorer = make_scorer(matthews_corrcoef)
    for i in range(5):
        scores = cross_val_score(DT, data.X_train, data.y_train, cv=10, scoring=scorer)
        print("iteration",i, "dt mean:", scores.mean())
        scores = list(scores)
        print("Decision Tree train scores:\n", scores)
    return DT
    # DT = DT.fit(train_data[:, :-1], train_data[:, -1])
    # predictionsDT = DT.predict(validation_data[:, :-1])

    # validating predicions
    # dtError = 0
    # for i in range(0, len(validation_data)):
    #         if(validation_data[i][20] != predictionsDT[i]):
    #                 dtError = dtError + 1
    # print("DT Error : ", float(dtError)/len(validation_data)*100.0)
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _sfn(l, mask, myrad, bcast_var):
    """Score classifier on searchlight data using cross-validation.

    The classifier is in `bcast_var[2]`. The labels are in `bast_var[0]`. The
    number of cross-validation folds is in `bast_var[1].
    """
    clf = bcast_var[2]
    data = l[0][mask, :].T
    # print(l[0].shape, mask.shape, data.shape)
    skf = model_selection.StratifiedKFold(n_splits=bcast_var[1],
                                          shuffle=False)
    accuracy = np.mean(model_selection.cross_val_score(clf, data,
                                                       y=bcast_var[0],
                                                       cv=skf,
                                                       n_jobs=1))
    return accuracy
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def example_of_cross_validation_using_model_selection(raw_data, labels, num_subjects, num_epochs_per_subj):
    # NOTE: this method does not work for sklearn.svm.SVC with precomputed kernel
    # when the kernel matrix is computed in portions; also, this method only works
    # for self-correlation, i.e. correlation between the same data matrix.

    # no shrinking, set C=1
    svm_clf = svm.SVC(kernel='precomputed', shrinking=False, C=1)
    #logit_clf = LogisticRegression()
    clf = Classifier(svm_clf, epochs_per_subj=num_epochs_per_subj)
    # doing leave-one-subject-out cross validation
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_subjects,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, list(zip(raw_data, raw_data)),
                                             y=labels,
                                             cv=skf)
    print(scores)
    logger.info(
        'the overall cross validation accuracy is %.2f' %
        np.mean(scores)
    )
项目:pyglmnet    作者:glm-tools    | 项目源码 | 文件源码
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()
    cv = KFold(X.shape[0], 5)

    glm_normal = GLM(distr='gaussian', alpha=0.01, reg_lambda=0.1)
    # check that it returns 5 scores
    scores = cross_val_score(glm_normal, X, y, cv=cv)
    assert_equal(len(scores), 5)

    param_grid = [{'alpha': np.linspace(0.01, 0.99, 2)},
                  {'reg_lambda': np.logspace(np.log(0.5), np.log(0.01),
                                             10, base=np.exp(1))}]
    glmcv = GridSearchCV(glm_normal, param_grid, cv=cv)
    glmcv.fit(X, y)
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def model_cross_valid(X,Y):
    seed = 7
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    def bulid_model(model_name):
        model = model_name()
        return model
    scoring = 'neg_mean_squared_error'
    # + random fest boost lstm gbdt

    for model_name in [LinearRegression,ElasticNet]:
    #for model_name in [LinearRegression,Ridge,Lasso,ElasticNet,KNeighborsRegressor,DecisionTreeRegressor,SVR,RandomForestRegressor,AdaBoostRegressor,GradientBoostingRegressor]:
        model = bulid_model(model_name)
        results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
        print(model_name,results.mean())
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def fit_regression(X, y, regression_class=LinearRegression, regularization_const=.001):
    '''
        Given a dataset and some solutions (X, y) a regression class (from scikit learn)
        and an Lambda which is required if the regression class is Lasso or Ridge

        X (pandas DataFrame): The data.
        y (pandas DataFrame or Series): The answers.
        regression_class (class): One of sklearn.linear_model.[LinearRegression, Ridge, Lasso]
        regularization_const: the regularization_const value (regularization parameter) for Ridge or Lasso.
                              Called alpha by scikit learn for interface reasons.

        Return:
            tuple, (the_fitted_regressor, mean(cross_val_score)).
    '''
    if regression_class is LinearRegression:
        predictor = regression_class()
    else:
        predictor = regression_class(alpha=regularization_const, normalize=True)

    predictor.fit(X, y)

    cross_scores = cross_val_score(predictor, X, y=y, scoring='neg_mean_squared_error')
    cross_scores_corrected = np.sqrt(-1 * cross_scores)  # Scikit learn returns negative vals && we need root

    return (predictor, np.mean(cross_scores_corrected))
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def test_mdr_sklearn_pipeline():
    """Ensure that MDR can be used as a transformer in a scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True))
    assert np.mean(cv_scores) > 0.
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def test_mdr_sklearn_pipeline_parallel():
    """Ensure that MDR can be used as a transformer in a parallelized scikit-learn pipeline"""
    features = np.array([[2,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    1],
                         [0,    0],
                         [0,    0],
                         [0,    0],
                         [1,    1],
                         [1,    1]])

    classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
    clf = make_pipeline(MDR(), LogisticRegression())
    cv_scores = cross_val_score(clf, features, classes, cv=StratifiedKFold(n_splits=5, shuffle=True), n_jobs=-1)
    assert np.mean(cv_scores) > 0.
项目:xplore    作者:fahd09    | 项目源码 | 文件源码
def eval_models(eda_objs, clfs):
    '''
    Uses a given set of classifiers objects to evaluates a given set of pipelines
    and return their CV scores.

    Parameters
    ----------
    pipelines_names: list of strings
                names of the pipelines to compare
    eda_objs : list of objects
    clfs     : list of classifiers
    *kwargs : Additional arguments to pass to sikit-learn's cross_val_score 
    '''        

    if isinstance(clfs, list) is False:
        clfs = [clfs]
    acc = []
    for clf_name, clf in clfs:        
        for pipe_name, obj in eda_objs:   
            X, y = obj.df[obj._get_input_features()], obj.df[obj.y]
            cv_score = cross_val_score(estimator=clf, X=X, y=y, cv=5, scoring='r2') #neg_mean_squared_error
            acc.append([(clf_name, pipe_name, v) for v in cv_score])
    acc = [item for sublist in acc for item in sublist] # flatten the list of lists
    return acc
项目:yellowbrick    作者:DistrictDataLabs    | 项目源码 | 文件源码
def __init__(self, model, ax=None, alphas=None,
                 cv=None, scoring=None, **kwargs):

        # Check to make sure this is not a "RegressorCV"
        name = model.__class__.__name__
        if name.endswith("CV"):
            raise YellowbrickTypeError((
                "'{}' is a CV regularization model;"
                " try AlphaSelection instead."
            ).format(name))

        # Call super to initialize the class
        super(ManualAlphaSelection, self).__init__(model, ax=ax, **kwargs)

        # Set manual alpha selection parameters
        self.alphas = alphas or np.logspace(-10, -2, 200)
        self.errors = None
        self.score_method = partial(cross_val_score, cv=cv, scoring=scoring)
项目:nba-prediction    作者:395299296    | 项目源码 | 文件源码
def train_model(team_stats, result_data, test_data):
    # ??????
    X, y = build_dataSet(team_stats, result_data)

    # ??????
    print("Fitting on %d game samples.." % len(X))

    model = LogisticRegression()
    model.fit(X, y)

    #??10????????????
    print("Doing cross-validation..")
    print(cross_val_score(model, X, y, cv = 10, scoring='accuracy', n_jobs=-1).mean())

    #??????model?????????
    print('Predicting on test data..')

    result = []
    for index, row in test_data.iterrows():
        team1 = row['Vteam']
        team2 = row['Hteam']
        pred = predict_winner(team1, team2, model, team_stats)
        result.append(pred[0][0])

    return result
项目:movie-quality-profitability-predictor    作者:wbowditch    | 项目源码 | 文件源码
def compute_cross_fold(data):
     data_table = pd.read_csv("total_set.csv",index_col=0)

     #data_norm = (data - data.mean()) / (data.sum())
     scaler = preprocessing.StandardScaler().fit(data)
     data_scaled = scaler.transform(data)
     #print data_scaled
     profitability_target = data_table['Profitable']
     #print profitability_target
     #gross_target = data_table['Domestic Gross']
     #tomato = data_table['Rotten']


     #normalized_target_gross = (gross_target - gross_target.mean()) / (gross_target.max() - gross_target.min())
     #tomato = (tomato - tomato.mean()) / (tomato.max() - tomato.min())


     #clf_profit = svm.SVC(kernel='rbf',C=0.8, gamma=5,verbose=True)
     clf_profit = svm.LinearSVC(C=0.001,verbose=True,tol=.1)
     clf_profit.fit(data_scaled,profitability_target)
     scores = cross_val_score(clf_profit, data_scaled, profitability_target, cv=10)

     #print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
     return (scores.mean(), scores.std() * 2)
项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def cross_validation():
    x_train, x_test, y_train, y_test = load_data()
    k_lst = list(range(1, 30))
    lst_scores = []

    for k in k_lst:
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
        lst_scores.append(scores.mean())

    # changing to misclassification error
    MSE = [1 - x for x in lst_scores]
    optimal_k = k_lst[MSE.index(min(MSE))]
    print "The optimal number of neighbors is %d" % optimal_k
    # plot misclassification error vs k
    # plt.plot(k_lst, MSE)
    # plt.ylabel('Misclassification Error')
    plt.plot(k_lst, lst_scores)
    plt.xlabel('Number of Neighbors K')
    plt.ylabel('correct classification rate')
    plt.show()
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_predict_labels():
    # Check if ValueError (when labels is None) propagates to cross_val_score
    # and cross_val_predict
    # And also check if labels is correctly passed to the cv object
    X, y = make_classification(n_samples=20, n_classes=2, random_state=0)

    clf = SVC(kernel="linear")

    label_cvs = [LeaveOneLabelOut(), LeavePLabelOut(2), LabelKFold(),
                 LabelShuffleSplit()]
    for cv in label_cvs:
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             cross_val_score, estimator=clf, X=X, y=y, cv=cv)
        assert_raise_message(ValueError,
                             "The labels parameter should not be None",
                             cross_val_predict, estimator=clf, X=X, y=y, cv=cv)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_pandas():
    # check cross_val_score doesn't destroy pandas dataframe
    types = [(MockDataFrame, MockDataFrame)]
    try:
        from pandas import Series, DataFrame
        types.append((Series, DataFrame))
    except ImportError:
        pass
    for TargetType, InputFeatureType in types:
        # X dataframe, y series
        # 3 fold cross val is used so we need atleast 3 samples per class
        X_df, y_ser = InputFeatureType(X), TargetType(y2)
        check_df = lambda x: isinstance(x, InputFeatureType)
        check_series = lambda x: isinstance(x, TargetType)
        clf = CheckingClassifier(check_X=check_df, check_y=check_series)
        cross_val_score(clf, X_df, y_ser)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_precomputed():
    # test for svm with precomputed kernel
    svm = SVC(kernel="precomputed")
    iris = load_iris()
    X, y = iris.data, iris.target
    linear_kernel = np.dot(X, X.T)
    score_precomputed = cross_val_score(svm, linear_kernel, y)
    svm = SVC(kernel="linear")
    score_linear = cross_val_score(svm, X, y)
    assert_array_equal(score_precomputed, score_linear)

    # Error raised for non-square X
    svm = SVC(kernel="precomputed")
    assert_raises(ValueError, cross_val_score, svm, X, y)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cross_val_score, svm,
                  linear_kernel.tolist(), y)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_cross_val_score_with_score_func_classification():
    iris = load_iris()
    clf = SVC(kernel='linear')

    # Default score (should be the accuracy score)
    scores = cross_val_score(clf, iris.data, iris.target, cv=5)
    assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # Correct classification score (aka. zero / one score) - should be the
    # same as the default estimator score
    zo_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="accuracy", cv=5)
    assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)

    # F1 score (class are balanced so f1_score should be equal to zero/one
    # score
    f1_scores = cross_val_score(clf, iris.data, iris.target,
                                scoring="f1_weighted", cv=5)
    assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
项目:PortfolioTimeSeriesAnalysis    作者:MizioAnd    | 项目源码 | 文件源码
def rmse_cv(model, x_train, y_train):
        rmse = np.sqrt(-cross_val_score(model, x_train, y_train, scoring='neg_mean_squared_error', cv=5))
        return rmse
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def KFold_CrossValidation(self, scoring_metric):
        # Generate cross validation folds for the training dataset. 

        error = model_selection.cross_val_score(
                estimator=self.alg, 
                X=self.datablock.train[self.predictors].values, 
                y=self.datablock.train[self.datablock.target].values, 
                cv=self.cv_folds, scoring=scoring_metric, n_jobs=-1
                ) 

        return {
            'mean_error': np.mean(error),
            'std_error': np.std(error),
            'all_error': error 
            }
项目:fluentopt    作者:mehdidc    | 项目源码 | 文件源码
def feval(d):
    max_depth = d['max_depth']
    n_estimators = d['n_estimators']
    clf = RandomForestClassifier(n_jobs=-1, max_depth=max_depth, n_estimators=n_estimators)
    scores = cross_val_score(clf, data_X, data_y, cv=5, scoring='accuracy')
    return np.mean(scores) - np.std(scores)
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _cross_validation_for_one_voxel(clf, vid, num_folds, subject_data, labels):
    """Score classifier on data using cross validation."""
    # no shuffling in cv
    skf = model_selection.StratifiedKFold(n_splits=num_folds,
                                          shuffle=False)
    scores = model_selection.cross_val_score(clf, subject_data,
                                             y=labels,
                                             cv=skf, n_jobs=1)
    logger.debug(
        'cross validation for voxel %d is done' %
        vid
    )
    return (vid, scores.mean())
项目:dmon-adp    作者:igabriel85    | 项目源码 | 文件源码
def adaBoost(self, settings, data=None, dropna=True):
        df = self.__loadData(data, dropna)
        features = df.columns[:-1]
        X = df[features]
        y = df.iloc[:, -1].values
        seed = 7
        num_trees = 500
        kfold = model_selection.KFold(n_splits=10, random_state=seed)
        print kfold
        model = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
        results = model_selection.cross_val_score(model, X, y, cv=kfold)
        model.fit(X, y)
        print results.mean()
        print model.score(X, y)
        return True
项目:Machine-Learning-Algorithms    作者:PacktPublishing    | 项目源码 | 文件源码
def compute_accuracies(lr, dt, svc, vc, X, Y):
    accuracies = []

    accuracies.append(cross_val_score(lr, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(dt, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(svc, X, Y, scoring='accuracy', cv=10).mean())
    accuracies.append(cross_val_score(vc, X, Y, scoring='accuracy', cv=10).mean())

    print('Accuracies:')
    print(np.array(accuracies))

    return accuracies
项目:data_utilities    作者:fmv1992    | 项目源码 | 文件源码
def multiprocessing_grid_search(queue, shared_list, persistent_object):
    """Explore cross validation grid using multiprocessing."""
    # scores = cross_val_score(*cross_val_score_args, **cross_val_score_kwargs)
    # queue.put(scores)
    while True:
        # All parameters from cross_val_score, i to compute pickle name and
        # persistent_path.
        passed_parameters = queue.get()
        if passed_parameters is None:
            break
        # Dismember arguments and values.
        grid, cvs_args, cvs_kwargs = passed_parameters
        estimator, x = cvs_args
        estimator.set_params(**grid)
        del cvs_args

        # Check if value was already calculated:
        stored_value = persistent_object.retrieve(estimator, grid)
        if stored_value is None:
            scores = cross_val_score(estimator, x, **cvs_kwargs)
            persistent_object.update(estimator, grid, scores)
        else:
            scores = stored_value
        grid_result = grid.copy()
        grid_result['scores'] = scores
        shared_list.append(grid_result)
项目:data-science-bowl-2017    作者:tondonia    | 项目源码 | 文件源码
def score(self, params):
        self.change_to_int(params, self.to_int_params)
        self.level0.set_params(**params)
        score = model_selection.cross_val_score(self.level0, self.trainX, self.trainY, cv=5, n_jobs=-1)
        print('%s ------ Score Mean:%f, Std:%f' % (params, score.mean(), score.std()))
        return {'loss': score.mean(), 'status': STATUS_OK}
项目:HousePrices    作者:MizioAnd    | 项目源码 | 文件源码
def rmse_cv(model, x_train, y_train):
        rmse = np.sqrt(-cross_val_score(model, x_train, y_train, scoring='neg_mean_squared_error', cv=5))
        return rmse
项目:pyGPGO    作者:hawk31    | 项目源码 | 文件源码
def evaluateModel(C, gamma):
    clf = SVC(C=10**C, gamma=10**gamma)
    return np.average(cross_val_score(clf, X, y))
项目:tensorflow_kaggle_house_price    作者:Cuongvn08    | 项目源码 | 文件源码
def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
    rmse= np.sqrt(-cross_val_score(model, train.values, y_train, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def cross_validate(estimator, training_data, training_targets):
    mse = cross_val_score(estimator, X=training_data, y=training_targets, scoring=root_mean_log_squared_error)
    r2 = cross_val_score(estimator, X=training_data, y=training_targets, scoring='r2')

    return (-1 * np.mean(mse), np.mean(r2))
项目:Machine-Learning-Tools-on-Iris-Dataset    作者:debjitpaul    | 项目源码 | 文件源码
def perform_adaboost(self,X_train_std,y_train,X_test_std, y_test): ##perform adaboost

      ada = AdaBoostClassifier(n_estimators=10)
      ada.fit(X_train_std, y_train)
      train_score=cross_val_score(ada,X_train_std, y_train)
      print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
      test_score=cross_val_score(ada,X_test_std, y_test)
      print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
      X=X_test_std
      y=y_test
      resolution=0.01
      #Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      markers = ('s', 'x', 'o', '^', 'v')
      colors = ('red', 'blue', 'green', 'gray', 'cyan')
      cmap = ListedColormap(colors[:len(np.unique(y_test))])
      X=X_test_std
      y=y_test    
    # plot the decision surface
      x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
      x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
      xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

      Z = ada.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      Z = Z.reshape(xx1.shape)
      plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
      plt.xlim(xx1.min(), xx1.max())
      plt.ylim(xx2.min(), xx2.max())

      for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.5, c=cmap(idx),
                    marker=markers[idx], label=cl)
      plt.show()
项目:Machine-Learning-Tools-on-Iris-Dataset    作者:debjitpaul    | 项目源码 | 文件源码
def perform_random_forest(self,X_train_std,y_train,X_test_std, y_test): ## perform random forest

      rfc = RandomForestClassifier(n_estimators=10, max_depth=None,min_samples_split=2, random_state=0)

      # we create an instance of Neighbours Classifier and fit the data.
      rfc.fit(X_train_std, y_train)
      train_score=cross_val_score(rfc,X_train_std, y_train)
      print('The training accuracy is {:.2f}%'.format(train_score.mean()*100))
      test_score=cross_val_score(rfc,X_test_std, y_test)
      print('The test accuracy is {:.2f}%'.format(test_score.mean()*100))
      X=X_test_std
      y=y_test
      resolution=0.01
      #Z = svm.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      markers = ('s', 'x', 'o', '^', 'v')
      colors = ('red', 'blue', 'green', 'gray', 'cyan')
      cmap = ListedColormap(colors[:len(np.unique(y_test))])
      X=X_test_std
      y=y_test    
    # plot the decision surface
      x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
      x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
      xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))

      Z = rfc.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
      Z = Z.reshape(xx1.shape)
      plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)
      plt.xlim(xx1.min(), xx1.max())
      plt.ylim(xx2.min(), xx2.max())

      for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.5, c=cmap(idx),
                    marker=markers[idx], label=cl)
      plt.show()
项目:LSAT    作者:BillVanderLugt    | 项目源码 | 文件源码
def CV_eval(model, X, y):
    '''
    Perform 8-fold cross-validation

    Input: model, X data, Y data
    Return: mean of cross-val accuracy scores
    '''
    scores = cross_val_score(model, X, y, cv=8)
    pprint (scores)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    return scores.mean()
项目:playground    作者:Pennsy    | 项目源码 | 文件源码
def knn(data, predict=False, best_n=None):
    if best_n:
        # prediction
        clf = KNeighborsClassifier(n_neighbors=best_n)
        return clf
    knn_scores = []
    for n_neighbors in range(4, 51):
        clf = KNeighborsClassifier(n_neighbors=n_neighbors)
        scores = cross_val_score(clf, data.X_train, data.y_train, cv=5)
        knn_scores.append((n_neighbors, scores.mean()))
    knn_scores = sorted(knn_scores, key=lambda x: x[1], reverse=True)
    print(knn_scores)
项目:playground    作者:Pennsy    | 项目源码 | 文件源码
def svm_clf(data):
    clf = svm.LinearSVC(C=1)
    for i in range(5):
        scores = cross_val_score(clf, data.X_train, data.y_train, cv=10)
        print("iteration",i, "svm mean:", scores.mean())
        scores = list(scores)
        print("svm train scores:\n", scores)
    return clf


# use knn for impute missing values
项目:playground    作者:Pennsy    | 项目源码 | 文件源码
def knn(data, predict=False):
    n_neighbors = 3
    clf = KNeighborsClassifier(n_neighbors=n_neighbors)
    for i in range(5):
        scores = cross_val_score(clf, data.X_train, data.y_train, cv=10)
        print("svm mean:", scores.mean())
        scores = list(scores)
        print("svm train scores:\n", scores)

    # prediction
    best_n = n_neighbors
    clf = KNeighborsClassifier(n_neighbors=best_n)
    return clf
项目:forward    作者:yajun0601    | 项目源码 | 文件源码
def regression(filename):
    from sklearn.cross_validation import train_test_split
    print(filename)
    X,y = loadDataSet(filename)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    from sklearn.linear_model import LinearRegression
    from sklearn import metrics
    linreg = LinearRegression()
    linreg.fit(X_train, y_train)

#    print(linreg.intercept_, linreg.coef_)
    # pair the feature names with the coefficients
    feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
    #print(feature_cols, linreg.coef_)
    #zip(feature_cols, linreg.coef_)
    y_pred = linreg.predict(X_test)

    print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
    print("MSE:",metrics.mean_squared_error(y_test, y_pred))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    scores = cross_val_score(linreg, X, y,cv=5)
#    print(filename)
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    res = pd.DataFrame(linreg.coef_,columns=feature_cols,index=[filename])
    return (res)

#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
项目:forward    作者:yajun0601    | 项目源码 | 文件源码
def regression(filename):
    from sklearn.linear_model import LinearRegression
    from sklearn import metrics

    X,y = loadDataSet(filename)
    print(filename,X.shape)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, test_size=0.25)
    linreg = LinearRegression()
    linreg.fit(X_train, y_train)

#    print(linreg.intercept_, linreg.coef_)
    # pair the feature names with the coefficients
    feature_cols = ['????', '????', '??????','?????','??????','???????','???????','?????????','??????']
#    feature_cols = ['????', '??????','?????','??????','???????','???????','?????????','??????']

    #print(feature_cols, linreg.coef_)
    #zip(feature_cols, linreg.coef_)
    y_pred = linreg.predict(X_test)

    print("MAE:",metrics.mean_absolute_error(y_test, y_pred))
    print("MSE:",metrics.mean_squared_error(y_test, y_pred))
    print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    scores = cross_val_score(linreg, X, y,cv=3)
    print('scores:',scores)  
    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    res = pd.DataFrame(linreg.coef_.T[:len(feature_cols)].T,columns=feature_cols,index=[filename.split('.')[0]])
#    res = pd.DataFrame(linreg.coef_,index=[filename.split('.')[0]])
    return (res)

#files = ['201603.xlsx','201604.xlsx','201605.xlsx','?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx']
#files = ['?????3?.xlsx','?????4?.xlsx','?????5?.xlsx','?????6?.xlsx','201703_06.xlsx']
#files = ['201703_06.xlsx']
项目:Spam-Message-Classifier-sklearn    作者:ZPdesu    | 项目源码 | 文件源码
def cross_validation(self):
        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
        scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
项目:Spam-Message-Classifier-sklearn    作者:ZPdesu    | 项目源码 | 文件源码
def cross_validation(self):
        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
        scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
项目:Spam-Message-Classifier-sklearn    作者:ZPdesu    | 项目源码 | 文件源码
def cross_validation(self):
        cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=20)
        scores = cross_val_score(self.clf, self.training_data, self.training_target, cv=cv, scoring='f1_macro')
        print scores
        print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
项目:moabb    作者:NeuroTechX    | 项目源码 | 文件源码
def score(self, clf, X, y, groups, n_jobs=1):
        """get the score"""
        if len(np.unique(groups)) > 1:
            # if group as different values, use group
            cv = LeaveOneGroupOut()
        else:
            # else use kfold
            cv = KFold(5, shuffle=True, random_state=45)

        auc = cross_val_score(clf, X, y, groups=groups, cv=cv,
                              scoring='accuracy', n_jobs=n_jobs)
        return auc.mean()
项目:moabb    作者:NeuroTechX    | 项目源码 | 文件源码
def score(self, clf, X, y, groups, n_jobs=1):
        """get the score"""
        if len(np.unique(groups)) > 1:
            # if group as different values, use group
            cv = LeaveOneGroupOut()
        else:
            # else use kfold
            cv = KFold(5, shuffle=True, random_state=45)

        auc = cross_val_score(clf, X, y, groups=groups, cv=cv,
                              scoring='roc_auc', n_jobs=n_jobs)
        return auc.mean()
项目:House-Pricing    作者:playing-kaggle    | 项目源码 | 文件源码
def rmse_cv(model, X , y):
    rmse= np.sqrt(-cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv = 5))
    return(rmse)


#%%
项目:House-Pricing    作者:playing-kaggle    | 项目源码 | 文件源码
def rmse_cv(model, X, Y):
    rmse = np.sqrt(-cross_val_score(model, X, Y, scoring=scorer, cv=10))
    return (rmse)
项目:eezzy    作者:3Blades    | 项目源码 | 文件源码
def check_model(model, splits, X, y):
    model_scores = cross_val_score(model, X, y, cv=splits,
    scoring='neg_mean_absolute_error')
    return sum(model_scores) / len(model_scores)
项目:StockRecommendSystem    作者:doncat99    | 项目源码 | 文件源码
def perform_CV(self, X_train, y_train, number_folds, n, m):
        model = RandomForestClassifier(n_estimators=n, max_features=m, n_jobs=8, verbose=self.paras.verbose)
        acc = np.mean(cross_val_score(model, X_train, y_train, cv=number_folds))
        #print 'Size of Forrest : number of trees : ' + str(n) + ', maximum of features : ' + str(m) + '. Accuracy : ' + str(acc)
        return acc

    # MODEL SELECTION : Find best parameters ######################################
    ## Inputs :  X_train, y_train, number of folds, range of number of trees, range of max of features
    ## Outputs : optimal number of trees, optimal max of features, accuracy
项目:personality    作者:nlp-psych    | 项目源码 | 文件源码
def predict_trait(X, Y):
    scores = cross_val_score(svm.SVC(), X, Y, scoring='accuracy', cv=10)
    return scores.mean()
项目:machine-learning    作者:guilhermesilveira    | 项目源码 | 文件源码
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
    k = 10
    scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
    taxa_de_acerto = np.mean(scores)
    msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
    print(msg)
    return taxa_de_acerto
项目:machine-learning    作者:guilhermesilveira    | 项目源码 | 文件源码
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
    k = 10
    scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
    taxa_de_acerto = np.mean(scores)
    msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
    print(msg)
    return taxa_de_acerto
项目:machine-learning    作者:guilhermesilveira    | 项目源码 | 文件源码
def fit_and_predict(nome, modelo, treino_dados, treino_marcacoes):
    k = 10
    scores = cross_val_score(modelo, treino_dados, treino_marcacoes, cv = k)
    taxa_de_acerto = np.mean(scores)
    msg = "Taxa de acerto do {0}: {1}".format(nome, taxa_de_acerto)
    print(msg)
    return taxa_de_acerto