Python scipy.stats 模块，ranksums() 实例源码

我们从Python开源项目中，提取了以下6个代码示例，用于说明如何使用scipy.stats.ranksums()。

项目：pscore_match 作者：kellieotto | 项目源码 | 文件源码

def rank_test(covariates, groups):
    """ 
    Wilcoxon rank sum test for the distribution of treatment and control covariates.

    Parameters
    ----------
    covariates : DataFrame 
        Dataframe with one covariate per column.
        If matches are with replacement, then duplicates should be 
        included as additional rows.
    groups : array-like
        treatment assignments, must be 2 groups

    Returns
    -------
    A list of p-values, one for each column in covariates
    """    
    colnames = list(covariates.columns)
    J = len(colnames)
    pvalues = np.zeros(J)
    for j in range(J):
        var = covariates[colnames[j]]
        res = ranksums(var[groups == 1], var[groups == 0])
        pvalues[j] = res.pvalue
    return pvalues

项目：facial-emotion-detection-dl 作者：dllatas | 项目源码 | 文件源码

def main():
    """
    1st phase
    top1 = [70.0, 71.1, 72.5, 70.8, 68.1, 71.9, 71.1, 71.3, 68.4, 70.2]
    top3 = [75.8, 78.4, 77.8, 77.7, 80.0, 77.8, 78.7, 76.4, 79.1, 77.3]
    2nd phase
    """
    x = [53.6, 54.5, 53.7, 52.7, 53.1, 55.5, 55.5, 52.8, 53.7, 52.7]
    y = [89.7, 89.1, 89.5, 88.7, 89.4, 88.6, 89.8, 89.5, 89.2, 89.7]
    # Compute the Wilcoxon rank-sum statistic for two samples.
    wilcoxon = stats.ranksums(x, y)
    anova = stats.f_oneway(x, y)
    print "Wilcoxon: " + str(wilcoxon[1]) + "; ANOVA: " + str(anova[1])

项目：microbiomeHD 作者：cduvallet | 项目源码 | 文件源码

def get_layered_pvals(df, groupcol, valuecol, subset_by,
                      pval_method='kruskalwallis'):
    """
    Get pvalues for all pairwise combinations in groupcol.
    Performs calculating separately for each group in subset_by columns.
    In other words, this is a wrapper for groupby(subset_by) + get_all_pvals().

    Parameters
    ----------
    df : pandas dataframe
        tidy dataframe with labels in `groupcol` and values in `valuecol`
    groupcol, valuecol : str
        columns in df
    subset_by : str
        column to group by
    pval_method : str {'kruskalwallis', 'ranksums', 'wilcoxon', 'ttest_ind'}
        statistical method for comparison. Default is 'kruskalwallis'

    Returns
    -------
    pvals : dict
        multi-level dictionary, with outside keys as the unique values in
        df[subset_by] and the inner values as in get_all_pvals()
    """

    pvals = {}
    for s, subdf in df.groupby(subset_by):
        pvals[s] = get_all_pvals(subdf, groupcol, valuecol,
                                      method=pval_method)
    return pvals

项目：microbiomeHD 作者：cduvallet | 项目源码 | 文件源码

def compare_otus_teststat(df, Xsmpls, Ysmpls, method='kruskal-wallis', multi_comp=None):
    """
    Compares columns between Xsmpls and Ysmpls, with statistical method=method.
    Returns dataframe with both the qvals ('p') and test statistic ('test-stat')

    parameters
    ----------
    df             dataframe, samples are in rows and OTUs in columns
    X,Ysmpls       list of samples to compare
    method         statistical method to use for comparison
    multi_comp     str, type of multiple comparison test to do.
                   Currently accepts 'fdr' or None

    outputs
    -------
    results        dataframe with OTUs in rows and 'p' and 'test-stat' in columns

    """
    if method == 'kruskal-wallis':
        pfun = kruskalwallis
    elif method == 'wilcoxon' or method == 'ranksums':
        pfun = ranksums
    elif method == 'mann-whitney':
        pfun = mannwhitneyu
        # Note: prob wanna add some kwargs here to say whether 2sided or not

    results = pd.DataFrame(index=df.columns, columns=['test-stat', 'p'])
    for o in df.columns:
        try:
            h, p = pfun(df.loc[Xsmpls, o], df.loc[Ysmpls, o])
        except:
            p = 1
            h = 0
        results.loc[o, 'p'] = p
        results.loc[o, 'test-stat'] = h

    if multi_comp == 'fdr':
        _, results['q'], _, _ = multipletests(results['p'], method='fdr_bh')

    return results

项目：extract 作者：dblalock | 项目源码 | 文件源码

def computeRankSumZvalsPvals(errRates, lowIsBetter=True):
    ranks = computeRanks(errRates, onlyFullRows=False)

    # compute the ranked sums test p-value between different classifiers
    numClassifiers = errRates.shape[1]
    dims = (numClassifiers, numClassifiers)
    zvals = np.empty(dims)
    pvals = np.empty(dims)
    for i in range(numClassifiers):
        zvals[i, i] = 0
        pvals[i, i] = 1
        for j in range(i+1, numClassifiers):
            x = errRates.iloc[:, i]
            y = errRates.iloc[:, j]

            # compare using all datasets they have in common
            rowsWithoutNans = np.invert(np.isnan(x) + np.isnan(y))
            x = x[rowsWithoutNans]
            y = y[rowsWithoutNans]

            zvals[i, j], pvals[i, j] = ranksums(y, x) # cols are indep var
            zvals[j, i], pvals[j, i] = -zvals[i, j], pvals[i, j]

    classifierNames = ranks.columns.values
    zvals = pd.DataFrame(data=zvals, index=classifierNames,
        columns=classifierNames)
    pvals = pd.DataFrame(data=pvals, index=classifierNames,
        columns=classifierNames)
    return zvals, pvals

项目：microbiomeHD 作者：cduvallet | 项目源码 | 文件源码

def get_all_pvals(df, groupcol, valuecol, method='kruskalwallis'):
    """
    Returns pairwise p-values between all groups in the column `groupcol`.

    Parameters
    ----------
    df : pandas dataframe
        tidy dataframe with labels in `groupcol` and values in `valuecol`
    groupcol, valuecol : str
        columns in df
    method : str {'kruskalwallis', 'ranksums', 'wilcoxon', 'ttest_ind'}
        statistical method for comparison. Default is 'kruskalwallis'

    Returns
    -------
    pvals : dict
        dictionary with 'group1_vs_group2' as the keys and p-value as the values
    """

    pvals = {}

    ## Get all pairwise combinations
    grps = list(set(df[groupcol]))
    for g1 in grps:
        for g2 in grps[grps.index(g1)+1:]:
            if g1 != g2:
                ## Grab values
                x = df[df[groupcol] == g1][valuecol]
                y = df[df[groupcol] == g2][valuecol]

                ## Calculate p value
                if method == 'ranksums' or method == 'wilcoxon':
                    pfun = ranksums
                elif method == 'ttest_ind':
                    pfun = ttest_ind
                else:
                    pfun = kruskalwallis
                try:
                    _, p = pfun(x, y)
                except:
                    # Should probably have better error handling here...
                    p = np.nan

                ## Store p value
                pvals[g1 + '_vs_' + g2] = p
    return pvals