Python scipy.stats 模块,chi2_contingency() 实例源码

我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用scipy.stats.chi2_contingency()

项目:tableone    作者:tompollard    | 项目源码 | 文件源码
def _p_test(self,v,grouped_data,is_continuous,is_categorical,
            is_normal,min_observed,catlevels,
            pval=np.nan,ptest='Not tested'):
        """
        Compute p value
        """

        # do not test if any sub-group has no observations
        if min_observed == 0:
            warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))
            return pval,ptest

        # continuous
        if is_continuous and is_normal:
            # normally distributed
            ptest = 'One-way ANOVA'
            test_stat, pval = stats.f_oneway(*grouped_data)
        elif is_continuous and not is_normal:
            # non-normally distributed
            ptest = 'Kruskal-Wallis'
            test_stat, pval = stats.kruskal(*grouped_data)
        # categorical
        elif is_categorical:
            # default to chi-squared
            ptest = 'Chi-squared'
            chi2, pval, dof, expected = stats.chi2_contingency(grouped_data)
            # if any expected cell counts are < 5, chi2 may not be valid
            # if this is a 2x2, switch to fisher exact
            if expected.min() < 5:
                if grouped_data.shape == (2,2):
                    ptest = 'Fisher''s exact'
                    oddsratio, pval = stats.fisher_exact(grouped_data)
                else:
                    ptest = 'Chi-squared (warning: expected count < 5)'
                    warnings.warn('No p-value was computed for {} due to the low number of observations.'.format(v))

        return pval,ptest
项目:grasp    作者:textgain    | 项目源码 | 文件源码
def fsel(data=[]): # (feature selection, using chi2)
    """ Returns a {feature: p-value} dict 
        for the given set of (vector, label)-tuples.
    """
    from scipy.stats import chi2_contingency as chi2

    f1 = collections.defaultdict(float) # {label: count}
    f2 = collections.defaultdict(float) # {feature: count}
    f3 = collections.defaultdict(float) # {feature, label: count}
    p  = {}
    for v, label in data:
        f1[label] += 1
    for v, label in data:
        for f in v:
            f2[f] += 1
            f3[f, label] += 1
    for f in f2:
        p[f] = chi2([[f1[label] - f3[f, label] or 0.1 for label in f1],
                     [            f3[f, label] or 0.1 for label in f1]])[1]
    return p
项目:coquery    作者:gkunter    | 项目源码 | 文件源码
def _func(self, x, size, ext_size, width):
        obs = pd.np.array(
            [[x.freq1, x.freq2],
             [size - x.freq1 * width, ext_size - x.freq2 * width]])
        try:
            tmp = stats.chi2_contingency(obs,
                                         lambda_="log-likelihood")
        except ValueError as e:
            print(e)
            return pd.np.nan

        return tmp[0]
项目:qiskit-sdk-py    作者:QISKit    | 项目源码 | 文件源码
def test_random_circuits(self):
        local_simulator = qasm_simulator.QasmSimulator()
        for circuit in self.rqg.get_circuits(format='QuantumCircuit'):
            self.log.info(circuit.qasm())
            compiled_circuit = openquantumcompiler.compile(circuit.qasm())
            shots = 100
            min_cnts = int(shots / 10)
            job_pq = QuantumJob(compiled_circuit,
                                backend='local_projectq_simulator',
                                seed=1, shots=shots)
            job_py = QuantumJob(compiled_circuit,
                                backend='local_qasm_simulator',
                                seed=1, shots=shots)
            result_pq = pq_simulator.run(job_pq)
            result_py = local_simulator.run(job_py)
            counts_pq = result_pq.get_counts(result_pq.get_names()[0])
            counts_py = result_py.get_counts(result_py.get_names()[0])
            # filter states with few counts
            counts_pq = {key:cnt for key,cnt in counts_pq.items() if cnt > min_cnts}
            counts_py = {key:cnt for key,cnt in counts_py.items() if cnt > min_cnts}
            self.log.info('local_projectq_simulator: ' + str(counts_pq))
            self.log.info('local_qasm_simulator: ' + str(counts_py))
            self.assertTrue(counts_pq.keys() == counts_py.keys())
            states = counts_py.keys()
            # contingency table
            ctable = numpy.array([[counts_pq[key] for key in states],
                                  [counts_py[key] for key in states]])
            result = chi2_contingency(ctable)
            self.log.info('chi2_contingency: ' + str(result))
            with self.subTest():
                self.assertGreater(result[1], 0.01)
项目:chat    作者:cambridgeltl    | 项目源码 | 文件源码
def run_test(q1_pos, q2_pos, q1_neg,q2_neg):
    '''
    this method takes four parallel arrays representing a 2X2 contingency table.
    the length of these parallel arrays denotes the number of tests that will be run,
    either a chi-squared test or an fisher-exact test are run, epending whether the requriments for a
    reliable chi-squared test are satisifed.

    Bonferroni correction is then applied by adjusting the p-values for all of the tests

    We return two parellel arrays, the first array is the p-values of for the tests, the second array is the test value
    e.g. the chi-squared value or the fisher-exact oddsratio.

    '''

    input = [q1_pos, q2_pos, q1_neg,q2_neg]
    n = len(input[0])
    if not all(len(x) == n for x in  input):
        raise BaseException ("length of input lists must be of same length")

    pvalues = []
    test_values = []

    for i in range(0,n):

        obs = np.array([ [input[0][i],input[1][i]],[input[2][i],input[3][i]] ])
        if useFisherExact(obs):
            p = fisher_exact(obs)[1]
            t = fisher_exact(obs)[0]
        else:
            p = chi2_contingency(obs)[1]
            t = chi2_contingency(obs)[0]

        pvalues.append(p)
        test_values.append(t)
    #applying Bonferroni correction
    adjustedPValues = [ float(p)/float(n) for p in pvalues]
    return [adjustedPValues, test_values]
项目:lol_champ_recs    作者:Cpierse    | 项目源码 | 文件源码
def get_p_vals(role1,champ1,single_counts=True,span=3):
    # Use a chi-squared test to calculate p-values to compare the recommendation 
    # distributions for the top 3 champs vs the next few recommendations.
    champ1=str(champ2id.get(champ1,champ1))
    p_vals = {}
    for role2 in recs[tier][role1][champ1]:
        p_vals[role2] = {}
        if role2=='TOTAL' or role2=='DATA': 
            continue
        for idx in range(1,4):
            values = []
            for pos_to_compare in range(idx+1,idx+1+span):
                # Get ids from recs:
                champ2_1 = str(champ2id[recs[tier][role1][champ1][role2][idx]['champ']])
                champ2_2 = str(champ2id[recs[tier][role1][champ1][role2][pos_to_compare]['champ']])
                # Get data:
                N = recs[tier][role1][champ1][role2]['N']
                if N > 10:
                    data = sliding_count_recs[tier][role1][champ1][role2]
                    champ2_1_data = np.array(data['DATA'][champ2_1] + [0]*(N-len(data['DATA'][champ2_1])))
                    champ2_2_data = np.array(data['DATA'][champ2_2] + [0]*(N-len(data['DATA'][champ2_2])))
                    if single_counts:
                        champ2_1_data[champ2_1_data>0]=1
                        champ2_2_data[champ2_2_data>0]=1
                    contingency_mat = np.array([[sum(champ2_1_data), N-sum(champ2_1_data)],[sum(champ2_2_data),N-sum(champ2_2_data)]])
                    values.append(chi2_contingency(contingency_mat)[1])
                else:
                    values.append(1)
            p_vals[role2][idx] = values
    return p_vals
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def cramers_v_stat(confusion_matrix):
    """Calculate Cramérs V statistic for categorial-categorial association."""
    chi2 = stats.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape
    return math.sqrt(phi2 / min((r-1), (k-1)))
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def cramers_v_corrected_stat(confusion_matrix):
    """Calculate Cramérs V statistic for categorial-categorial association.

    Uses correction from Bergsma and Wicher, Journal of the Korean Statistical
    Society 42 (2013): 323-328.
    """
    chi2 = stats.chi2_contingency(confusion_matrix)[0]
    n = confusion_matrix.sum()
    phi2 = chi2 / n
    r, k = confusion_matrix.shape
    phi2_corr = max(0, phi2 - ((k-1)*(r-1)) / (n-1))
    r_corr = r - ((r-1)**2) / (n-1)
    k_corr = k - ((k-1)**2) / (n-1)
    return math.sqrt(phi2_corr / min((r_corr-1), (k_corr-1)))
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def feature_importance_classification(features, target, n_neighbors=3, random_state=None):

    cont = features.select_dtypes(include=[np.floating])
    disc = features.select_dtypes(include=[np.integer, np.bool])

    cont_imp = pd.DataFrame(index=cont.columns)
    disc_imp = pd.DataFrame(index=disc.columns)

    # Continuous features
    if cont_imp.index.size > 0:

        # F-test
        f_test = feature_selection.f_classif(cont, target)
        cont_imp['f_statistic'] = f_test[0]
        cont_imp['f_p_value'] = f_test[1]

        # Mutual information
        mut_inf = feature_selection.mutual_info_classif(cont, target, discrete_features=False,
                                                        n_neighbors=n_neighbors,
                                                        random_state=random_state)
        cont_imp['mutual_information'] = mut_inf

    # Discrete features
    if disc_imp.index.size > 0:

        # Chi²-test
        chi2_tests = defaultdict(dict)

        for feature in disc.columns:
            cont = pd.crosstab(disc[feature], target)
            statistic, p_value, _, _ = stats.chi2_contingency(cont)
            chi2_tests[feature]['chi2_statistic'] = statistic
            chi2_tests[feature]['chi2_p_value'] = p_value

        chi2_tests_df = pd.DataFrame.from_dict(chi2_tests, orient='index')
        disc_imp['chi2_statistic'] = chi2_tests_df['chi2_statistic']
        disc_imp['chi2_p_value'] = chi2_tests_df['chi2_p_value']

        # Cramér's V (corrected)
        disc_imp['cramers_v'] = [
            cramers_v_corrected_stat(pd.crosstab(feature, target).values)
            for _, feature in disc.iteritems()
        ]

        # Mutual information
        mut_inf = feature_selection.mutual_info_classif(disc, target, discrete_features=True,
                                                        n_neighbors=n_neighbors,
                                                        random_state=random_state)
        disc_imp['mutual_information'] = mut_inf

    return cont_imp, disc_imp