Python scipy.stats 模块,ttest_ind() 实例源码

我们从Python开源项目中,提取了以下27个代码示例,用于说明如何使用scipy.stats.ttest_ind()

项目:pscore_match    作者:kellieotto    | 项目源码 | 文件源码
def t_test(covariates, groups):
    """ 
    Two sample t test for the distribution of treatment and control covariates

    Parameters
    ----------
    covariates : DataFrame 
        Dataframe with one covariate per column.
        If matches are with replacement, then duplicates should be 
        included as additional rows.
    groups : array-like
        treatment assignments, must be 2 groups

    Returns
    -------
    A list of p-values, one for each column in covariates
    """
    colnames = list(covariates.columns)
    J = len(colnames)
    pvalues = np.zeros(J)
    for j in range(J):
        var = covariates[colnames[j]]
        res = ttest_ind(var[groups == 1], var[groups == 0])
        pvalues[j] = res.pvalue
    return pvalues
项目:pysciencedock    作者:Kitware    | 项目源码 | 文件源码
def volcano(data):
    if len(data.index.levels[1]) != 2:
        raise Exception('Volcano requires secondary index with two values')

    indexA, indexB = data.index.levels[1]

    dataA = data.xs(indexA, level=1)
    dataB = data.xs(indexB, level=1)

    meanA = dataA.mean(axis=0)
    meanB = dataB.mean(axis=0)

    change = meanB.div(meanA)

    statistic, pvalues = ttest_ind(dataA, dataB)

    pvalues = pd.DataFrame(
        [statistic, pvalues, -np.log10(pvalues), change, np.log2(change)],
        columns=data.columns,
        index=['t', 'p', '-log10(p)', 'foldchange', 'log2(foldchange)']).transpose()

    return pvalues
项目:pysciencedock    作者:Kitware    | 项目源码 | 文件源码
def ttest(data):
    if len(data.index.levels[1]) != 2:
        raise Exception('T-test requires secondary index with two values')

    indexA, indexB = data.index.levels[1]

    dataA = data.xs(indexA, level=1)
    dataB = data.xs(indexB, level=1)

    statistic, pvalues = ttest_ind(dataA, dataB)

    pvalues = pd.DataFrame(
        [statistic, pvalues, -np.log10(pvalues)],
        columns=data.columns,
        index=['t', 'p', '-log10(p)']).transpose()

    return pvalues
项目:physalia    作者:TQRG    | 项目源码 | 文件源码
def hypothesis_test(sample_a, sample_b):
    """Perform hypothesis test over two samples of measurements.

    Uses Welch's t-test to check whether energy consumption
    is different in the populations of samples a and b.

    Args:
        sample_a (list of Measurement): measurements of sample a
        sample_b (list of Measurement): measurements of sample b

    Returns:
        t (float): The calculated t-statistic
        prob (float): The two-tailed p-value

    """
    return ttest_ind(
        [measurement.energy_consumption for measurement in sample_a],
        [measurement.energy_consumption for measurement in sample_b],
        equal_var=False
    )
项目:ECNN    作者:alazareva    | 项目源码 | 文件源码
def update_layer_value_probs(bottom, top, cls):
    """Updates the probability distribution parameters for layer sizes.

    Keyword arguments:
    bottom -- the low performing models
    top -- the high performing models
    cls -- the layer utilities class corresponding to Concolutional Layers or Dense Layers
    """

    top = [cls.get_average_layer_size(model) for model in top if cls.has_layers(model)]
    bottom = [cls.get_average_layer_size(model) for model in bottom if cls.has_layers(model)]
    if top and bottom:
        _, p = stats.ttest_ind(top, bottom)
        if p < 0.05:
            top_mean = np.mean(top)
            bottom_mean = np.mean(bottom)
            print('adjusting parama', cls)
            if top_mean < bottom_mean:
                cls.beta += 2
            else:
                cls.alpha += 2
            print(cls.beta, cls.alpha)
项目:lol_champ_recs    作者:Cpierse    | 项目源码 | 文件源码
def quick_t_test(N_players,champ1_data,champ2_data):
    champ1_data += [0]*(N_players-len(champ1_data))
    champ2_data += [0]*(N_players-len(champ2_data))
    return ttest_ind(champ1_data,champ2_data,equal_var=False)

#t_score = quick_t_test(recs[tier]['TOP'][str(champ2id['Darius'])]['TOP']['N'], 
#             sliding_count_recs[tier]['TOP'][str(champ2id['Darius'])]['TOP']['DATA'][str(champ2id['Garen'])],
#             sliding_count_recs[tier]['TOP'][str(champ2id['Darius'])]['TOP']['DATA'][str(champ2id['Nasus'])])
#
#
#t_score = quick_t_test(recs[tier]['TOP'][str(champ2id['Darius'])]['TOP']['N'], 
#             sliding_count_recs[tier]['TOP'][str(champ2id['Darius'])]['TOP']['DATA'][str(champ2id['Garen'])],
#             sliding_count_recs[tier]['TOP'][str(champ2id['Darius'])]['TOP']['DATA'][str(champ2id['Olaf'])])
#


# More detailed t-test function:
项目:ipanda    作者:varnivey    | 项目源码 | 文件源码
def calculate_group_ttest_pvals(self):
        '''Calculate p-values from group t-test'''

        expr_t = self.expr_t
        expr_n = self.expr_n

        pval_list = []

        for i in range(len(self.genes)):


            t_sample = expr_t[:,i]
            n_sample = expr_n[:,i]

            t,cur_pval = ttest_func(randomize_samples(t_sample),randomize_samples(n_sample))

            pval_list.append(cur_pval)

        pval_list = np.array(pval_list)
        pval_list[pval_list == 0] = sys.float_info.min

        self.pval_list = list(pval_list)
项目:fitr    作者:abrahamnunes    | 项目源码 | 文件源码
def param_ttest(X, Y):
    """
    Two-sample t-test for difference between parameters (actual and estimated)

    Parameters
    ----------
    X : ndarray(shape=(n_subjects, nparams))
    Y : ndarray(shape=(n_subjects, nparams))

    Returns
    -------
    res : ndarray(shape=(n_params, 2))
        (t-statistic, p-value)

    Notes
    -----
    Arrays ``X`` and ``Y`` must be the same size
    """
    nparams = np.shape(X)[1]
    res = np.zeros([nparams, 2])

    for j in range(nparams):
        res[j, :] = ttest_ind(X[:,j], Y[:,j])

    return res
项目:Behavior    作者:danustc    | 项目源码 | 文件源码
def session_ttest(gcount, nlist, nsess = 4 ):
    # n1: the session label
    # n2: the session label 
    # return: two matrices of t-values and p-values
    nl = len(nlist) # The length of sessions that 
    t_val = np.zeros([nl,nl])
    p_val = np.identity(nl)

    for ii in np.arange(nl):
        ni = nlist[ii]
        X1 = gcount[ni::nsess, 0]
        for jj in np.arange(ii):
            nj = nlist[jj]

            X2 = gcount[nj::nsess, 0]

            t_val[ii,jj], p_val[ii,jj] = stats.ttest_ind(X1, X2)
            t_val[jj,ii] = t_val[ii,jj]
            p_val[jj,ii] = p_val[ii,jj]

    return t_val, p_val
项目:dc_stat_think    作者:justinbois    | 项目源码 | 文件源码
def test_studentized_diff_of_means(data_1, data_2):
    if np.var(data_1) == np.var(data_2) == 0:
        assert np.isnan(dcst.studentized_diff_of_means(data_1, data_2))
    else:
        t, _ = st.ttest_ind(data_1, data_2, equal_var=False)
        assert np.isclose(dcst.studentized_diff_of_means(data_1, data_2), t)
项目:noisyopt    作者:andim    | 项目源码 | 文件源码
def test(self, xtest, x, type_='smaller', alpha=0.05):
        """
        Parameters
        ----------
        type_: in ['smaller', 'equality']
            type of comparison to perform
        alpha: float
            significance level
        """
        # call function to make sure it has been evaluated a sufficient number of times
        if type_ not in ['smaller', 'equality']:
            raise NotImplementedError(type_)
        ftest, ftestse = self(xtest)
        f, fse = self(x)
        # get function values
        fxtest = np.array(self.cache[tuple(xtest)])
        fx = np.array(self.cache[tuple(x)])
        if np.mean(fxtest-fx) == 0.0:
            if type_ == 'equality':
                return True
            if type_ == 'smaller':
                return False
        if self.paired:
            # if values are paired then test on distribution of differences
            statistic, pvalue = stats.ttest_rel(fxtest, fx, axis=None)
        else:
            statistic, pvalue = stats.ttest_ind(fxtest, fx, equal_var=False, axis=None)
        if type_ == 'smaller':
            # if paired then df=N-1, else df=N1+N2-2=2*N-2 
            df = self.N-1 if self.paired else 2*self.N-2
            pvalue = stats.t.cdf(statistic, df) 
            # return true if null hypothesis rejected
            return pvalue < alpha
        if type_ == 'equality':
            # return true if null hypothesis not rejected
            return pvalue > alpha
项目:xdesign    作者:tomography    | 项目源码 | 文件源码
def compute_background_ttest(image, masks):
    """Determine whether the background has significantly different luminance
    than the other phases.

    Parameters
    -------------
    image : ndarray

    masks : list of ndarrays
        Masks for the background and any other phases. Does not autogenerate
        the non-background mask because maybe you want to compare only two
        phases.

    Returns
    ----------
    tstat : scalar
    pvalue : scalar
    """

    # separate the background
    background = image[masks[0] > 0]
    # combine non-background masks
    other = False
    for i in range(1, len(masks)):
        other = np.logical_or(other, masks[i] > 0)
    other = image[other]

    tstat, pvalue = ttest_ind(background, other, axis=None, equal_var=False)
    # print([tstat,pvalue])

    return tstat, pvalue
项目:physalia    作者:TQRG    | 项目源码 | 文件源码
def pairwise_welchs_ttest(*samples, **options):
    """Perform pairwise Welch's t-test."""
    names = options.get("names")
    sort = options.get("sort")
    table_fmt = options.get("table_fmt", "grid")
    out = options.get("out", sys.stdout)

    if not names:
        names = [
            sample and sample[0].use_case.title().replace('_', ' ')
            for sample in samples
        ]

    if sort:
        names, samples = zip(*sorted(zip(names, samples)))

    samples = [np.array(sample, dtype='float') for sample in samples]
    len_samples = len(samples)
    table = list()
    for index, sample_one in enumerate(samples):
        row = list()
        for sample_two in samples[:index]:
            row.append(_format_test_result(ttest_ind(
                sample_one, sample_two,
                equal_var=False
            )))
        row.extend(["--"]*(len_samples-index))
        table.append(row)
    out.write(tabulate(table, headers=names, showindex=names, tablefmt=table_fmt))
    out.write("\n")
项目:TFG    作者:alu0100505078    | 项目源码 | 文件源码
def welchTest(nAlgorithms,hyperVolumeList):
    #primero calcular las medias y varianzas...
    mean =  calculeMean(hyperVolumeList)    #calcular medias
    variance = calculeVariance(hyperVolumeList) #calcular varianzas
    welch = []
    equalAverage = all_same(mean)
    if sameAverage == True :
        for i,v in range(nAlgorithms):
            algorithm = np.array(hyperVolumeList[i])
            j =i+1
            while j < nAlgorithms:
                algorithmCompare = np.array(hyperVolumeList[j])
                welchTest = stats.ttest_ind(algorithm, algorithmCompare)
                welch.append(welchTest)
                j +=1
    else:
        equalVariance = all_same(variance)
        if equalVariance == True:
            for i,v in range(nAlgorithms):
                algorithm = np.array(hyperVolumeList[i])
                j =i+1
                while j < nAlgorithms:
                    algorithmCompare = np.array(hyperVolumeList[j])
                    welchTest = stats.ttest_ind(algorithm, algorithmCompare)
                    welch.append(welchTest)
                    j +=1
        else:
            for i,v in range(nAlgorithms):
                algorithm = np.array(hyperVolumeList[i])
                j =i+1
                while j < nAlgorithms:
                    algorithmCompare = np.array(hyperVolumeList[j])
                    welchTest = stats.ttest_ind(algorithm, algorithmCompare,equal_var =False)
                    welch.append(welchTest)
                    j +=1
    return welch
项目:scikit-mdr    作者:EpistasisLab    | 项目源码 | 文件源码
def score(self, features, targets):
        """Estimates the quality of the ContinuousMDR model using a t-statistic.

        Parameters
        ----------
        features: array-like {n_samples, n_features}
            Feature matrix to predict from
        targets: array-like {n_samples}
            List of true target values

        Returns
        -------
        quality_score: float
            The estimated quality of the Continuous MDR model

        """
        if self.feature_map is None:
            raise ValueError('The Continuous MDR model must be fit before score() can be called.')

        group_0_trait_values = []
        group_1_trait_values = []

        for feature_instance in self.feature_map:
            if self.feature_map[feature_instance] == 0:
                group_0_trait_values.extend(self.mdr_matrix_values[feature_instance])
            else:
                group_1_trait_values.extend(self.mdr_matrix_values[feature_instance])

        return abs(ttest_ind(group_0_trait_values, group_1_trait_values).statistic)
项目:TPs    作者:DataMiningP7    | 项目源码 | 文件源码
def test_word_means(X, y, word_index):
    """ Performs a two-means t-test on the tf-idf values of a given word
     represented by its index in the matrix X. The test checks whether the word
     is over-represented in spammy messages and returns its p-value. The
     smaller the p-value, the more over-represented the word is within spams
     compared to hams.


    Args:
        X: the TF-IDF matrix where each line represents a document and each
           column represents a word, typically obtained by running
           transform_text().
        y: a binary vector where the i-th value indicates whether the i-th
           document is a spam, typically obtained by running transform_text().
        word_index: an int representing a column number in X.
    Returns:
        A double that corresponds to the p-value of the test (the probability
        that the word is NOT over-represented in the spams).
   """
    # get a full matrice instead of a sparse one
    X = X.todense()

    x0 = X[ y == 0, word_index ]
    x1 = X[ y == 1, word_index ]

    #  t < 0 means x0 < x1
    t, p = ttest_ind(x0, x1)
    return p
项目:microbiomeHD    作者:cduvallet    | 项目源码 | 文件源码
def get_layered_pvals(df, groupcol, valuecol, subset_by,
                      pval_method='kruskalwallis'):
    """
    Get pvalues for all pairwise combinations in groupcol.
    Performs calculating separately for each group in subset_by columns.
    In other words, this is a wrapper for groupby(subset_by) + get_all_pvals().

    Parameters
    ----------
    df : pandas dataframe
        tidy dataframe with labels in `groupcol` and values in `valuecol`
    groupcol, valuecol : str
        columns in df
    subset_by : str
        column to group by
    pval_method : str {'kruskalwallis', 'ranksums', 'wilcoxon', 'ttest_ind'}
        statistical method for comparison. Default is 'kruskalwallis'

    Returns
    -------
    pvals : dict
        multi-level dictionary, with outside keys as the unique values in
        df[subset_by] and the inner values as in get_all_pvals()
    """

    pvals = {}
    for s, subdf in df.groupby(subset_by):
        pvals[s] = get_all_pvals(subdf, groupcol, valuecol,
                                      method=pval_method)
    return pvals
项目:lol_champ_recs    作者:Cpierse    | 项目源码 | 文件源码
def t_test_ex(role1,champ1,single_counts=False):
    champ1=str(champ2id[champ1])
    for role2 in recs[tier][role1][champ1]:
        if role2=='TOTAL' or role2=='DATA': 
            continue
        for idx in range(1,4):
            values = []
            for pos_to_compare in range(idx+1,idx+4):
                # Get ids from recs:
                champ2_1 = champ2id[recs[tier][role1][champ1][role2][idx]['champ']]
                champ2_2 = champ2id[recs[tier][role1][champ1][role2][pos_to_compare]['champ']]
                # Get data:
                N = recs[tier][role1][champ1][role2]['N']
                data = sliding_count_recs[tier][role1][champ1][role2]
                champ2_1_data = np.array(data['DATA'][champ2_1] + [0]*(N-len(data['DATA'][champ2_1])))
                champ2_2_data = np.array(data['DATA'][champ2_2] + [0]*(N-len(data['DATA'][champ2_2])))
                if single_counts:
                    champ2_1_data[champ2_1_data>0]=1
                    champ2_2_data[champ2_2_data>0]=1
                values.append(str(ttest_ind(champ2_1_data,champ2_2_data,equal_var=False)[1]))
            print( role2 + ' ' + id2champ[champ2_1] + ' p-values: ' +  values[0] + ', ' + values[1] + ', ' + values[2])
        print('-----------------------------------------------------------------------------')

#t_test_ex('TOP','Darius')
#t_test_ex('TOP','Darius',single_counts=True)
#
#t_test_ex('MID','Lux')
#t_test_ex('MID','Lux',single_counts=True)
#
#t_test_ex('ADC','Caitlyn')
#t_test_ex('ADC','Caitlyn',single_counts=True)
#
#
#
项目:ABtests    作者:leodema    | 项目源码 | 文件源码
def test_analysis(self):
        my_test = TtestIndip(np.array(self.a), np.array(self.b), equal_var=False)
        p_value = my_test.p_value
        t, p_value_expected = ttest_ind(self.a, self.b, equal_var=False)

        self.assertEqual(round(p_value_expected, 9), round(p_value, 9))
项目:ABtests    作者:leodema    | 项目源码 | 文件源码
def test_analysis_list(self):
        my_test = TtestIndip(self.a, self.b, False)
        p_value = my_test.p_value
        my_test.report()
        t, p_value_expected = ttest_ind(self.a, self.b, equal_var=False)

        self.assertEqual(round(p_value_expected, 9), round(p_value, 9))
项目:mrqap-python    作者:lisette-espin    | 项目源码 | 文件源码
def stats(self, x, y):
        if not self.diagonal:
            xflatten = np.delete(x, [i*(x.shape[0]+1)for i in range(x.shape[0])])
            yflatten = np.delete(y, [i*(y.shape[0]+1)for i in range(y.shape[0])])
            p = np.corrcoef(xflatten,yflatten)
            utils.printf('Pearson\'s correlation:\n{}'.format(p))
            utils.printf('Z-Test:{}'.format(ztest(xflatten, yflatten)))
            utils.printf('T-Test:{}'.format(ttest_ind(xflatten, yflatten)))
        else:
            p = np.corrcoef(x, y)
            utils.printf('Pearson\'s correlation:\n{}'.format(p))
            utils.printf('Z-Test:{}'.format(ztest(x, y)))
            utils.printf('T-Test:{}'.format(ttest_ind(x, y)))
项目:nip-convnet    作者:gangchill    | 项目源码 | 文件源码
def create_ckplus_boxplot():

    ## #### ##
    # CKPLUS #
    ## #### ##

    print('\nCKPLUS')

    # ckplus_full_pre_trained = [0.7323, 0.7374, 0.7475, 0.7374, 0.7273] 
    # ckplus_full_random_init = [0.6919, 0.7172, 0.6869, 0.7172, 0.7071]

    ckplus_full_pre_trained = [0.7475, 0.7374, 0.7273, 0.7525, 0.7424, 0.7323, 0.7374, 0.7424, 0.7374, 0.7374]
    ckplus_full_random_init = [0.6970, 0.6818, 0.7020, 0.6717, 0.7020, 0.7020, 0.6818, 0.7020, 0.7121, 0.6869]
    ckplus_full = [ckplus_full_random_init, ckplus_full_pre_trained]
    ckplus_full_trial_count = min(len(ckplus_full_pre_trained), len(ckplus_full_random_init))

    _, ckplus_pvalue = stats.ttest_ind(ckplus_full_pre_trained, ckplus_full_random_init, equal_var = False)

    avg_improvement = (np.mean(ckplus_full_pre_trained) - np.mean(ckplus_full_random_init)) * 100
    print('--> CKPLUS <--')
    print('avg improvement: {}'.format(avg_improvement))
    print('cifar_1k p-value: {}'.format(ckplus_pvalue))

    ckplus_ylims = 0.6, 0.8

    data            = [ckplus_full]
    trial_counts    = [ckplus_full_trial_count]

    visualize_boxplot(data, 'ckplus', [0.696], trial_counts, ckplus_ylims)
项目:stock-predict-by-RNN-LSTM    作者:blockchain99    | 项目源码 | 文件源码
def compare(self):
        for i in range(self.count):
            for j in range(self.count):
                if i < j:
                    tst, pvalue = stats.ttest_ind(self.errors[i], self.errors[j])
                    if pvalue < 0.05:
                        print("{0} is significantly better than {1}".format(self.names[i], self.names[j]))
                        print("{0} avg err = {1}, {2} avg err = {3}".format(
                                self.names[i], np.average(self.errors[i]),
                                self.names[j], np.average(self.errors[j])
                        ))
                    else:
                        print("{0} and {1} are not significantly different".format(self.names[i], self.names[j]))
项目:FLASH    作者:yuyuz    | 项目源码 | 文件源码
def main(pkl_list, name_list, cut=sys.maxint):
    pickles = plot_util.load_pickles(name_list, pkl_list)
    best_dict, idx_dict, keys = plot_util.get_best_dict(name_list, pickles,
                                                       cut=cut)

    for k in keys:
        sys.stdout.write("%10s: %s experiment(s)\n" % (k, len(best_dict[k])))

    sys.stdout.write("Unpaired t-tests-----------------------------------------------------\n")
    # TODO: replace by itertools
    for idx, k in enumerate(keys):
        if len(keys) > 1:
            for j in keys[idx+1:]:
                t_true, p_true = stats.ttest_ind(best_dict[k], best_dict[j])
                rounded_t_true, rounded_p_true = stats.ttest_ind(numpy.round(best_dict[k], 3),
                                                                 numpy.round(best_dict[j], 3))

                sys.stdout.write("%10s vs %10s\n" % (k, j))
                sys.stdout.write("Standard independent 2 sample test, equal population variance\n")
                sys.stdout.write(" "*24 + "  T: %10.5e, p-value: %10.5e (%5.3f%%) \n" %
                                (t_true, p_true, p_true*100))
                sys.stdout.write("Rounded:                ")
                sys.stdout.write("  T: %10.5e, p-value: %10.5e (%5.3f%%)\n" %
                                (rounded_t_true, rounded_p_true, rounded_p_true*100))
                if tuple(map(int, (scipy.__version__.split(".")))) >= (0, 11, 0):
                    # print scipy.__version__ >= '0.11.0'
                    t_false, p_false = stats.ttest_ind(best_dict[k], best_dict[j], equal_var=False)
                    rounded_t_false, rounded_p_false = stats.ttest_ind(numpy.round(best_dict[k], 3),
                                                                       numpy.round(best_dict[j], 3),
                                                                       equal_var=False)
                    sys.stdout.write("Welch's t-test, no equal population variance\n")
                    sys.stdout.write(" "*24)
                    sys.stdout.write(": T: %10.5e, p-value: %10.5e (%5.3f%%)\n" %
                                    (t_false, p_false, p_false*100))
                    sys.stdout.write("Rounded:                ")
                    sys.stdout.write(": T: %10.5e, p-value: %10.5e (%5.3f%%)\n" %
                                    (rounded_t_false, rounded_p_false, rounded_p_false*100))
                sys.stdout.write("\n")

    sys.stdout.write("Best Value-----------------------------------------------------------\n")
    for k in keys:
        sys.stdout.write("%10s: %10.5f (min: %10.5f, max: %10.5f, std: %5.3f)\n" %
                        (k, float(numpy.mean(best_dict[k])), float(numpy.min(best_dict[k])),
                         numpy.max(best_dict[k]), float(numpy.std(best_dict[k]))))

    sys.stdout.write("Needed Trials--------------------------------------------------------\n")
    for k in keys:
        sys.stdout.write("%10s: %10.5f (min: %10.5f, max: %10.5f, std: %5.3f)\n" %
                        (k, float(numpy.mean(idx_dict[k])), float(numpy.min(idx_dict[k])),
                         numpy.max(idx_dict[k]), float(numpy.std(idx_dict[k]))))

    sys.stdout.write("------------------------------------------------------------------------\n")
项目:microbiomeHD    作者:cduvallet    | 项目源码 | 文件源码
def get_all_pvals(df, groupcol, valuecol, method='kruskalwallis'):
    """
    Returns pairwise p-values between all groups in the column `groupcol`.

    Parameters
    ----------
    df : pandas dataframe
        tidy dataframe with labels in `groupcol` and values in `valuecol`
    groupcol, valuecol : str
        columns in df
    method : str {'kruskalwallis', 'ranksums', 'wilcoxon', 'ttest_ind'}
        statistical method for comparison. Default is 'kruskalwallis'

    Returns
    -------
    pvals : dict
        dictionary with 'group1_vs_group2' as the keys and p-value as the values
    """

    pvals = {}

    ## Get all pairwise combinations
    grps = list(set(df[groupcol]))
    for g1 in grps:
        for g2 in grps[grps.index(g1)+1:]:
            if g1 != g2:
                ## Grab values
                x = df[df[groupcol] == g1][valuecol]
                y = df[df[groupcol] == g2][valuecol]

                ## Calculate p value
                if method == 'ranksums' or method == 'wilcoxon':
                    pfun = ranksums
                elif method == 'ttest_ind':
                    pfun = ttest_ind
                else:
                    pfun = kruskalwallis
                try:
                    _, p = pfun(x, y)
                except:
                    # Should probably have better error handling here...
                    p = np.nan

                ## Store p value
                pvals[g1 + '_vs_' + g2] = p
    return pvals
项目:Smart-Meter-Experiment-ML-Revisited    作者:felgueres    | 项目源码 | 文件源码
def AB(k_model, clustersDict):

    '''
    Computes AB testing on clustered samples.

    Parameters
    ----------

    k_model : sklearn.KMEANS
        Trained Kmeans model.

    data: dict
        Dictionary containing DataFrames for all clusters.

    Returns
    -------

    Plot : matplotlib.lines.Line2D
        Figure.

    '''
    tariffs = ['E', 'A', 'B', 'C', 'D']

    timeofuse = {'day': [8,17], 'peak':[17,19], 'night': [0,8], 'day2':[19,24]}

    #Create dict with p-value findings and power findings.

    for cluster in clustersDict:

        df = clustersDict[cluster]
        df = df.ix[df.Residential_Tariff.isin(tariffs)]
        df.Residential_Tariff = df.Residential_Tariff.apply(lambda x: 'Control' if x == 'E' else 'Trial')

        _df_Control = df.ix[df.Residential_Tariff == 'Control'].iloc[:,:-3].T
        _df_Trial = df.ix[df.Residential_Tariff == 'Trial'].iloc[:,:-3].T

        for time in timeofuse:

            control = _df_Control.iloc[timeofuse[time][0]:timeofuse[time][1]+1,:].sum()
            trial = _df_Trial.iloc[timeofuse[time][0]:timeofuse[time][1]+1,:].sum()


            fig = plt.figure()
            ax_ = fig.add_subplot(1,1,1)

            # control_ = np.log(control)
            # trial_ = np.log(trial)

            control.plot(kind = 'kde', ax= ax_, alpha = 0.5 )
            trial.plot(kind = 'kde', ax=ax_, alpha = 0.5)

            ax_.set_title('Cluster %d: %s' % (cluster+1, time))
            ax_.set_xlim((1,5))
            ax_.set_ylim([0, 0.6])
            ax_.set_xlabel('Consumption (kWh)')
            # ax_.set_ylabel("Number of users")

            plt.show()

            print 'Cluster %d, %s p-value:' % ((cluster +1), time), ttest_ind(control, trial, equal_var=False)[1], 'power: ', stat_power(control, trial), 'magnitude: ', np.mean(trial)/np.mean(control) -1
项目:redash_client    作者:mozilla    | 项目源码 | 文件源码
def _power_and_ttest(self, control_vals, exp_vals):
    control_mean = statistics.mean(control_vals)
    control_std = statistics.stdev(control_vals)
    exp_mean = statistics.mean(exp_vals)
    exp_std = statistics.stdev(exp_vals)

    pooled_stddev = self._compute_pooled_stddev(
        control_std, exp_std, control_vals, exp_vals)

    power = 0
    percent_diff = None
    if control_mean != 0 and pooled_stddev != 0:
      percent_diff = (control_mean - exp_mean) / float(control_mean)
      effect_size = (abs(percent_diff) * float(control_mean)) / float(pooled_stddev)
      power = smp.TTestIndPower().solve_power(
          effect_size,
          nobs1=len(control_vals),
          ratio=len(exp_vals) / float(len(control_vals)),
          alpha=self.ALPHA_ERROR, alternative='two-sided')

    ttest_result = stats.ttest_ind(control_vals, exp_vals, equal_var=False)
    p_val = ""
    if len(ttest_result) >= 2 and not math.isnan(ttest_result[1]):
      p_val = ttest_result[1]

    mean_diff = exp_mean - control_mean

    if p_val <= self.ALPHA_ERROR and mean_diff < 0:
      significance = "Negative"
    elif p_val <= self.ALPHA_ERROR and mean_diff > 0:
      significance = "Positive"
    else:
      significance = "Neutral"

    return {
        "power": power,
        "p_val": p_val,
        "control_mean": control_mean,
        "mean_diff": mean_diff,
        "percent_diff": 0 if percent_diff is None else percent_diff * -100,
        "significance": significance,
    }