Python sklearn.metrics 模块,median_absolute_error() 实例源码

我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用sklearn.metrics.median_absolute_error()

项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    for ni in range( N_it):
        cv_score_l = cv_LinearRegression( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    for ni in range( N_it):
        cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type
    for ni in range( N_it):
        cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)
        yVp_ltype_l.append( yVp_ltype)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le,
           'yVp': yVp_ltype_l}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    for ni in range( N_it):
        cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_It( xM, yV, n_splits = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type
    for ni in range( N_it):
        cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_splits = n_splits, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)
        yVp_ltype_l.append( yVp_ltype)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le,
           'yVp': yVp_ltype_l}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_It(xM, yV, n_folds=5, scoring='median_absolute_error', N_it=10, disp=False, ldisp=False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    for ni in range(N_it):
        cv_score_l = cv_LinearRegression(
            xM, yV, n_folds=n_folds, scoring=scoring, disp=disp)
        cv_score_le.extend(cv_score_l)

    o_d = {'mean': np.mean(cv_score_le),
           'std': np.std(cv_score_le),
           'list': cv_score_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format(scoring,
                                                         o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_It(xM, yV, n_folds=5, scoring='median_absolute_error', N_it=10, disp=False, ldisp=False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    for ni in range(N_it):
        cv_score_l, ci_l = cv_LinearRegression_ci(
            xM, yV, n_folds=n_folds, scoring=scoring, disp=disp)
        cv_score_le.extend(cv_score_l)
        ci_le.extend(ci_l)

    o_d = {'mean': np.mean(cv_score_le),
           'std': np.std(cv_score_le),
           'list': cv_score_le,
           'ci': ci_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format(scoring,
                                                         o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_Ridge(xM, yV, alphas_log=(1, -1, 9), n_folds=5, n_jobs=-1, scoring='r2'):
    """
    Parameters
    -------------
    scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
    """
    print('If scoring is not r2 but error metric, output score is revered for scoring!')
    print(xM.shape, yV.shape)

    clf = linear_model.Ridge()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace(*alphas_log)}
    kf_n_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf_n = kf_n_c.split(xM)
    gs = model_selection.GridSearchCV(
        clf, parmas, scoring=scoring, cv=kf_n, n_jobs=n_jobs)

    gs.fit(xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    for ni in range( N_it):
        cv_score_l = cv_LinearRegression( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    for ni in range( N_it):
        cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type
    for ni in range( N_it):
        cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)
        yVp_ltype_l.append( yVp_ltype)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le,
           'yVp': yVp_ltype_l}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    for ni in range( N_it):
        cv_score_l = cv_LinearRegression( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    for ni in range( N_it):
        cv_score_l, ci_l = cv_LinearRegression_ci( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_It( xM, yV, n_folds = 5, scoring = 'median_absolute_error', N_it = 10, disp = False, ldisp = False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    yVp_ltype_l = list() # yVp_ltype is list type of yVp not matrix type
    for ni in range( N_it):
        cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred( xM, yV, n_folds = n_folds, scoring = scoring, disp = disp)
        cv_score_le.extend( cv_score_l)
        ci_le.extend( ci_l)
        yVp_ltype_l.append( yVp_ltype)

    o_d = {'mean': np.mean( cv_score_le),
           'std': np.std( cv_score_le),
           'list': cv_score_le,
           'ci': ci_le,
           'yVp': yVp_ltype_l}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format( scoring, o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def gs_Ridge( xM, yV, alphas_log = (1, -1, 9), n_folds = 5, n_jobs = -1, scoring = 'r2'):
    """
    Parameters
    -------------
    scoring: mean_absolute_error, mean_squared_error, median_absolute_error, r2
    """
    print(xM.shape, yV.shape)

    clf = linear_model.Ridge()
    #parmas = {'alpha': np.logspace(1, -1, 9)}
    parmas = {'alpha': np.logspace( *alphas_log)}
    kf_n = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)
    gs = grid_search.GridSearchCV( clf, parmas, scoring = scoring, cv = kf_n, n_jobs = n_jobs)

    gs.fit( xM, yV)

    return gs
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def estimate_accuracy(yEv, yEv_calc, disp = False):
    """
    It was originally located in jchem. However now it is allocated here
    since the functionality is more inline with jutil than jchem. 
    """

    r_sqr = metrics.r2_score( yEv, yEv_calc)
    RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc))
    MAE = metrics.mean_absolute_error( yEv, yEv_calc)
    DAE = metrics.median_absolute_error( yEv, yEv_calc)

    if disp:
        print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE))

    return r_sqr, RMSE, MAE, DAE
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    kf5 = kf5_c.split( xM)  
    cv_score_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    kf5 = kf5_c.split( xM)  
    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored. 
        coef = np.array(clf.coef_).tolist()
        intercept = np.array(clf.intercept_).tolist()
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_splits = 5, shuffle=True, disp = False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    # print( 'alpha of Ridge is', alpha)
    clf = linear_model.Ridge( alpha)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf5 = kf5_c.split( xM)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.        
        ci_l.append( (clf.coef_, clf.intercept_))
        y_a = np.array( yV[test])[:,0]
        yp_a = np.array( yVp_test)[:,0]
        cv_score_l.extend( np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def estimate_accuracy4(yEv, yEv_calc, disp = False):
    """
    It was originally located in jchem. However now it is allocated here
    since the functionality is more inline with jutil than jchem. 
    """

    r_sqr = metrics.r2_score( yEv, yEv_calc)
    RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc))
    MAE = metrics.mean_absolute_error( yEv, yEv_calc)
    DAE = metrics.median_absolute_error( yEv, yEv_calc)

    if disp:
        print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE))

    return r_sqr, RMSE, MAE, DAE
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def eval_score( model, X_test, y_test, string = "Test", graph = False):
    print()
    print(  "Evaluation of", string)
    print('--------')
    yP = model.predict(X_test)
    score_r2 = metrics.r2_score(y_test, yP)
    score_MedAE = metrics.median_absolute_error(y_test, yP)
    print('Accuracy')
    print('R2: {0:f}, MedAE: {1:f}'.format(score_r2, score_MedAE))
    print()

    if graph:
        kutil.regress_show4( y_test, yP)
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    kf5 = kf5_c.split( xM)  
    cv_score_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    kf5 = kf5_c.split( xM)  
    cv_score_l = list()
    ci_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])

        # Additionally, coef_ and intercept_ are stored. 
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    kf5 = kf5_c.split( xM)  
    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored. 
        coef = np.array(clf.coef_).tolist()
        intercept = np.array(clf.intercept_).tolist()
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_splits = 5, shuffle=True, disp = False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    # print( 'alpha of Ridge is', alpha)
    clf = linear_model.Ridge( alpha)
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf5 = kf5_c.split( xM)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.        
        ci_l.append( (clf.coef_, clf.intercept_))
        y_a = np.array( yV[test])[:,0]
        yp_a = np.array( yVp_test)[:,0]
        cv_score_l.extend( np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full( xM, yV, n_splits = 5, shuffle=True, disp = False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=shuffle)
    kf5 = kf5_c.split( xM)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.        
        ci_l.append( (clf.coef_, clf.intercept_))
        y_a = np.array( yV[test])[:,0]
        yp_a = np.array( yVp_test)[:,0]
        cv_score_l.extend( np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def estimate_accuracy4(yEv, yEv_calc, disp = False):
    r_sqr = metrics.r2_score( yEv, yEv_calc)
    RMSE = np.sqrt( metrics.mean_squared_error( yEv, yEv_calc))
    MAE = metrics.mean_absolute_error( yEv, yEv_calc)
    DAE = metrics.median_absolute_error( yEv, yEv_calc)

    if disp:
        print("r^2={0:.2e}, RMSE={1:.2e}, MAE={2:.2e}, DAE={3:.2e}".format( r_sqr, RMSE, MAE, DAE))

    return r_sqr, RMSE, MAE, DAE
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf5 = kf5_c.split(xM)

    cv_score_l = list()
    ci_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a
        # vector
        clf.fit(xM[train, :], yV[train])

        yVp_test = clf.predict(xM[test, :])

        # Additionally, coef_ and intercept_ are stored.
        ci_l.append((clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append(
                metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError("{} scoring is not supported.".format(scoring))

    if disp:  # Now only this flag is on, the output will be displayed.
        print('{}: mean, std -->'.format(scoring),
              np.mean(cv_score_l), np.std(cv_score_l))

    return cv_score_l, ci_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full_Ridge(xM, yV, alpha, n_folds=5, shuffle=True, disp=False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """

    if disp:
        print(xM.shape, yV.shape)

    # print( 'alpha of Ridge is', alpha)
    clf = linear_model.Ridge(alpha)
    kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle)
    kf5 = kf5_c.split(xM)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a
        # vector
        clf.fit(xM[train, :], yV[train])

        yVp_test = clf.predict(xM[test, :])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.
        ci_l.append((clf.coef_, clf.intercept_))
        y_a = np.array(yV[test])[:, 0]
        yp_a = np.array(yVp_test)[:, 0]
        cv_score_l.extend(np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full(xM, yV, n_folds=5, shuffle=True, disp=False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=shuffle)
    kf5 = kf5_c.split(xM)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a
        # vector
        clf.fit(xM[train, :], yV[train])

        yVp_test = clf.predict(xM[test, :])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.
        ci_l.append((clf.coef_, clf.intercept_))
        y_a = np.array(yV[test])[:, 0]
        yp_a = np.array(yVp_test)[:, 0]
        cv_score_l.extend(np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_It(xM, yV, n_folds=5, scoring='median_absolute_error', N_it=10, disp=False, ldisp=False):
    """
    N_it times iteration is performed for cross_validation in order to make further average effect. 
    The flag of 'disp' is truned off so each iteration will not shown.  
    """
    cv_score_le = list()
    ci_le = list()
    yVp_ltype_l = list()  # yVp_ltype is list type of yVp not matrix type
    for ni in range(N_it):
        cv_score_l, ci_l, yVp_ltype = cv_LinearRegression_ci_pred(
            xM, yV, n_folds=n_folds, scoring=scoring, disp=disp)
        cv_score_le.extend(cv_score_l)
        ci_le.extend(ci_l)
        yVp_ltype_l.append(yVp_ltype)

    o_d = {'mean': np.mean(cv_score_le),
           'std': np.std(cv_score_le),
           'list': cv_score_le,
           'ci': ci_le,
           'yVp': yVp_ltype_l}

    if disp or ldisp:
        print('{0}: mean(+/-std) --> {1}(+/-{2})'.format(scoring,
                                                         o_d['mean'], o_d['std']))

    return o_d
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)

    cv_score_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)

    cv_score_l = list()
    ci_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])

        # Additionally, coef_ and intercept_ are stored. 
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored. 
        coef = np.array(clf.coef_).tolist()
        intercept = np.array(clf.intercept_).tolist()
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_folds = 5, shuffle=True, disp = False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    # print( 'alpha of Ridge is', alpha)
    clf = linear_model.Ridge( alpha)
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.        
        ci_l.append( (clf.coef_, clf.intercept_))
        y_a = np.array( yV[test])[:,0]
        yp_a = np.array( yVp_test)[:,0]
        cv_score_l.extend( np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)

    cv_score_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci( xM, yV, n_folds = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=True)

    cv_score_l = list()
    ci_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])

        # Additionally, coef_ and intercept_ are stored. 
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full_Ridge( xM, yV, alpha, n_folds = 5, shuffle=True, disp = False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    # print( 'alpha of Ridge is', alpha)
    clf = linear_model.Ridge( alpha)
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.        
        ci_l.append( (clf.coef_, clf.intercept_))
        y_a = np.array( yV[test])[:,0]
        yp_a = np.array( yVp_test)[:,0]
        cv_score_l.extend( np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci_pred_full( xM, yV, n_folds = 5, shuffle=True, disp = False):
    """
    Note - scoring is not used. I may used later. Not it is remained for compatibility purpose.
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5 = cross_validation.KFold( xM.shape[0], n_folds=n_folds, shuffle=shuffle)

    cv_score_l = list()
    ci_l = list()
    yVp = yV.copy() 
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])
        yVp[test] = yVp_test

        # Additionally, coef_ and intercept_ are stored.        
        ci_l.append( (clf.coef_, clf.intercept_))
        y_a = np.array( yV[test])[:,0]
        yp_a = np.array( yVp_test)[:,0]
        cv_score_l.extend( np.abs(y_a - yp_a).tolist())

    return cv_score_l, ci_l, yVp.A1.tolist()
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def _make_scoring_r0( scoring):
    if scoring == 'r2':
        return metrics.make_scorer( metrics.r2_score)
    elif scoring == 'mean_absolute_error':
        return metrics.make_scorer( metrics.mean_absolute_error, greater_is_better=False)
    elif scoring == 'mean_squared_error':
        return metrics.make_scorer( metrics.mean_squared_error, greater_is_better=False)
    elif scoring == 'median_absolute_error':
        return metrics.make_scorer( metrics.median_absolute_error, greater_is_better=False)
    else:
        raise ValueError("Not supported scoring")
项目:InnerOuterRNN    作者:Chemoinformatics    | 项目源码 | 文件源码
def eval_metrics_on(predictions, labels, regression = True):
    '''
    assuming this is a regression task; labels are continuous-valued floats

    returns most regression-related scores for the given predictions/targets as a dictionary:

        r2, mean_abs_error, mse, rmse, median_absolute_error, explained_variance_score
    '''
    if len(labels[0])==2: #labels is list of data/labels pairs
        labels = np.concatenate([l[1] for l in labels])
    if regression:
        predictions = predictions[:,0]
        r2                       = metrics.r2_score(labels, predictions)
        mean_abs_error           = np.abs(predictions - labels).mean()
        mse                      = ((predictions - labels)**2).mean()
        rmse                     = np.sqrt(mse)
        median_absolute_error    = metrics.median_absolute_error(labels, predictions) # robust to outliers
        explained_variance_score = metrics.explained_variance_score(labels, predictions) # best score = 1, lower is worse
        return {'r2':r2, 'mean_abs_error':mean_abs_error, 'mse':mse, 'rmse':rmse, 
                'median_absolute_error':median_absolute_error, 'explained_variance_score':explained_variance_score, 'main_metric':rmse}
    else:
        predictions = predictions[:,1]


        if labels.max()==1:
            auc    = metrics.auc(predictions, labels[:,1], reorder=1)
        accuracy = np.mean((predictions>0.5)==labels[:,1])

        return {'auc':auc, 'accuracy':accuracy, 'main_metric':accuracy}
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression_metrics(n_samples=50):
    y_true = np.arange(n_samples)
    y_pred = y_true + 1

    assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
    assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
    assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
    assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_regression_metrics_at_limits():
    assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
    assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
    assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
项目:ML-Predictions    作者:ltfschoen    | 项目源码 | 文件源码
def process_linear_regression(self):
        """ Linear Regression

        Fit a Machine Learning Model to the data
          - where `input` is matrix with:
            - rows - `n_samples`
            - columns - `n_features`
          - where `output` is:
            - array of `n_samples` when predicting one output
            - matrix of `n_samples` rows and `n_outputs` columns when predicting multiple outputs simultaneously

          - Important Note:
            - Given say a dataset with 400 rows and 10 columns, must pass in matrix of 400 rows and 1 column to predict 1 column
            - Prior to passing `input` to the Fit function, convert the Series/Dataframe objects to a Numpy matrix
              first so Scikit-Learn can convert the input to a Numpy Object
              - WRONG Obtain Numpy array (400 elements) returned from Series using `values` attribute `df["mpg"].values.shape`
              - CORRECT Obtain Numpy matrix object (400 rows, 1 col) returned from Series using `values` attribute `df[["mpg"]].values.shape`
        """
        print("Linear Regression in progress...")

        model = self.prediction_utils.generate_model(self.model_type, None, None, None)
        df = self.prediction_data.df_listings
        inputs = df[self.training_columns]
        if not len(inputs):
            print("No Training Columns to use for Logistic Regression. Perhaps they were all bad and removed.")
            return None

        # Check inputs is Numpy matrix not Numpy array
        print("Shape of inputs to Scikit-Learn Fit function: ", inputs.values.shape)
        output = df[self.target_column]
        model.fit(inputs, output)
        predictions = model.predict(inputs)
        df["predictions"] = predictions
        if self.prediction_config.PLOT_LINEAR_RELATIONSHIP_PREDICTION_VS_ACTUAL_FOR_TRAIN_FEATURES_VS_TARGET == True:
            self.plot_linear_relationships(predictions)
        print("Check predictions accuracy against 'known' Model Training Data:\n %r" % (df[[self.target_column, "predictions"]]))

        print("Predictions using Scikit-Learn Linear Regression: %r" % (predictions) )

        mae = median_absolute_error(df[self.target_column], predictions)
        mse = mean_squared_error(df[self.target_column], predictions, multioutput='raw_values')
        rmse = math.sqrt(mse)

        print("MAE: %r" % (mae) )
        print("MSE: %r" % (mse[0]) )
        print("RMSE: %r" % (rmse) )

        if mae and rmse:
            mae_rmse_ratio_prefix = mae / rmse
            print("MAE to RMSE Ratio using Linear Regression: %.2f:1" % (mae_rmse_ratio_prefix) )

        if self.prediction_config.PLOT_INDIVIDUAL_TRAIN_FEATURES_VS_TARGET == True:
            for index, training_model_feature_name in enumerate(self.training_columns):
                self.prediction_utils.plot(training_model_feature_name, df)

        self.response["pre-hyperparameter_optimisation"] = {
            "model_type": self.model_type,
            "rmse": rmse
        }
        print("Linear Regression Pre-Hyperparameter k Optimisation results: %r" % (self.response))
项目:ML-Predictions    作者:ltfschoen    | 项目源码 | 文件源码
def process_hyperparameter_fixed(self):
        """
        Step 1: Create instance of K-Nearest-Neighbors Machine Learning Model class where p=2 is Euclidean Distance
        Step 2: Fit the Model using by specifying data for K-Nearest-Neighbor Model to use:
            - X as Training data (i.e. DataFrame "feature" Columns from Training data)
            - y as Target values (i.e. DataFrame's Target Column)

            X argument of `fit` function is matrix-like object, containing cols of interest from Training set (to make predictions)
            y argument of `fit` function is list-like object, containing just TARGET_COLUMN, `price`.

            X and y are passed into `fit` method of Scikit-Learn.
                Warning: DO NOT pass in data containing the following else Error occurs:
                    - Missing values
                    - Non-numerical values

        Step 3: Scikit-Learn's `predict` function called to make predictions on cols of test_df.
            Returns NumPy array of predicted "price" TARGET_COLUMN values

        Step 4: Calculate MAE, MSE, and RMSE float values for each individual Target, where least loss "best" values are 0
        """
        print("Training features include: %r" % (self.training_columns) )

        training_column_names = self.training_columns
        feature_combo = '__'.join(training_column_names)

        model = self.prediction_utils.generate_model(self.model_type, self.prediction_config.HYPERPARAMETER_FIXED, 'brute', 2)

        _temp_training_part = self.prediction_data.training_part
        X = _temp_training_part[self.training_columns]
        y = _temp_training_part[self.target_column]

        model.fit(X, y)

        _temp_testing_part = self.prediction_data.testing_part
        predictions = model.predict(_temp_testing_part[self.training_columns])

        print("Predictions using Scikit-Learn KNN Regression: %r" % (predictions) )

        mae = median_absolute_error(_temp_testing_part[self.target_column], predictions)
        mse = mean_squared_error(_temp_testing_part[self.target_column], predictions, multioutput='raw_values')
        rmse = math.sqrt(mse)

        print("MAE: %r" % (mae) )
        print("MSE: %r" % (mse[0]) )
        print("RMSE: %r" % (rmse) )

        if mae and rmse:
            mae_rmse_ratio_prefix = mae / rmse
            print("MAE to RMSE Ratio: %.2f:1" % (mae_rmse_ratio_prefix) )

        if self.prediction_config.PLOT_INDIVIDUAL_TRAIN_FEATURES_VS_TARGET == True:
            for index, training_model_feature_name in enumerate(self.training_columns):
                self.prediction_utils.plot(training_model_feature_name, _temp_testing_part)

        return {
            "feature_names": feature_combo,
            "rmse": rmse,
            "k_neighbors_qty": self.prediction_config.HYPERPARAMETER_FIXED,
            "k_folds_qty": None,
            "k_fold_cross_validation_toggle": False
        }
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression_ci( xM, yV, n_splits = 5, scoring = 'median_absolute_error', disp = False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """  

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold( n_splits=n_splits, shuffle=True)
    kf5 = kf5_c.split( xM)  
    cv_score_l = list()
    ci_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a vector
        clf.fit( xM[train,:], yV[train])

        yVp_test = clf.predict( xM[test,:])

        # Additionally, coef_ and intercept_ are stored. 
        ci_l.append( (clf.coef_, clf.intercept_))
        if scoring == 'median_absolute_error':
            cv_score_l.append( metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError( "{} scoring is not supported.".format( scoring))

    if disp: # Now only this flag is on, the output will be displayed. 
        print('{}: mean, std -->'.format( scoring), np.mean( cv_score_l), np.std( cv_score_l))

    return cv_score_l, ci_l
项目:jamespy_py3    作者:jskDr    | 项目源码 | 文件源码
def cv_LinearRegression(xM, yV, n_folds=5, scoring='median_absolute_error', disp=False):
    """
    metrics.explained_variance_score(y_true, y_pred)    Explained variance regression score function
    metrics.mean_absolute_error(y_true, y_pred) Mean absolute error regression loss
    metrics.mean_squared_error(y_true, y_pred[, ...])   Mean squared error regression loss
    metrics.median_absolute_error(y_true, y_pred)   Median absolute error regression loss
    metrics.r2_score(y_true, y_pred[, ...]) R^2 (coefficient of determination) regression score function.
    """

    if disp:
        print(xM.shape, yV.shape)

    clf = linear_model.LinearRegression()
    kf5_c = model_selection.KFold(n_splits=n_folds, shuffle=True)
    kf5 = kf5_c.split(xM)

    cv_score_l = list()
    for train, test in kf5:
        # clf.fit( xM[train,:], yV[train,:])
        # yV is vector but not a metrix here. Hence, it should be treated as a
        # vector
        clf.fit(xM[train, :], yV[train])

        yVp_test = clf.predict(xM[test, :])
        if scoring == 'median_absolute_error':
            cv_score_l.append(
                metrics.median_absolute_error(yV[test], yVp_test))
        else:
            raise ValueError("{} scoring is not supported.".format(scoring))

    if disp:  # Now only this flag is on, the output will be displayed.
        print('{}: mean, std -->'.format(scoring),
              np.mean(cv_score_l), np.std(cv_score_l))

    return cv_score_l