Python scipy.stats 模块,kendalltau() 实例源码

我们从Python开源项目中,提取了以下20个代码示例,用于说明如何使用scipy.stats.kendalltau()

项目:reco    作者:mayukh18    | 项目源码 | 文件源码
def kendalltau(rankA, rankB):

    if len(rankA) != len(rankB):
        raise TypeError("The two rank lists must be of the same length.")

    N = len(rankA)

    if isinstance(rankA[0], tuple):
        rankA = [rankA[i][0] for i in range(N)]

    if isinstance(rankB[0], tuple):
        rankB = [rankB[i][0] for i in range(N)]

    listA = [i for i in range(N)]
    listB = [rankB.index(rankA[i]) for i in range(N)]

    return kendalltau(listA, listB)[0]
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]
项目:mixedvines    作者:asnelt    | 项目源码 | 文件源码
def _heuristic_element_order(samples):
        '''
        Finds an order of elements that heuristically facilitates vine
        modelling.  For this purpose, Kendall's tau is calculated between
        samples of pairs of elements and elements are scored according to the
        sum of absolute Kendall's taus of pairs the elements appear in.

        Parameters
        ----------
        samples : array_like
            n-by-d matrix of samples where n is the number of samples and d is
            the number of marginals.

        Returns
        -------
        order : array_like
            Permutation of all element indices reflecting descending scores.
        '''
        dim = samples.shape[1]
        # Score elements according to total absolute Kendall's tau
        score = np.zeros(dim)
        for i in range(1, dim):
            for j in range(i):
                tau, _ = kendalltau(samples[:, i], samples[:, j])
                score[i] += np.abs(tau)
                score[j] += np.abs(tau)
        # Get order indices for descending score
        order = score.argsort()[::-1]
        return order
项目:aes    作者:feidong1991    | 项目源码 | 文件源码
def kendall_tau(y_true, y_pred):
    """
    Calculate Kendall's tau between ``y_true`` and ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: Kendall's tau if well-defined, else 0
    """
    ret_score = kendalltau(y_true, y_pred)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
项目:vwoptimize    作者:denik    | 项目源码 | 文件源码
def kendall_tau(y_true, y_score):
    from scipy.stats import kendalltau
    ret_score = kendalltau(y_true, y_score)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
项目:aes-gated-word-char    作者:unkn0wnxx    | 项目源码 | 文件源码
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
项目:deepcpg    作者:cangermueller    | 项目源码 | 文件源码
def kendall(y, z, nb_sample=100000):
    """Compute Kendall's correlation coefficient."""
    if len(y) > nb_sample:
        idx = np.arange(len(y))
        np.random.shuffle(idx)
        idx = idx[:nb_sample]
        y = y[idx]
        z = z[idx]
    return kendalltau(y, z)[0]
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_corr_rank(self):
        tm._skip_if_no_scipy()

        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        self.assertAlmostEqual(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        self.assertAlmostEqual(result, expected)

        # these methods got rewritten in 0.8
        if scipy.__version__ < LooseVersion('0.9'):
            raise nose.SkipTest("skipping corr rank because of scipy version "
                                "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        self.assertAlmostEqual(A.corr(B, method='kendall'), kexp)
        self.assertAlmostEqual(A.corr(B, method='spearman'), sexp)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_nancorr_kendall(self):
        tm.skip_if_no_package('scipy.stats')
        from scipy.stats import kendalltau
        targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
        targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='kendall')
项目:virtual-competition    作者:mechaphish    | 项目源码 | 文件源码
def compare_scores(byus, bydarpa):
    """byus = { team: score }, bydarpa = { team: score }"""
    assert frozenset(byus.keys()) == frozenset(bydarpa.keys())

    our_ranking = ordered_sets(byus)
    darpa_ranking = ordered_sets(bydarpa)

    our_picks = our_ranking.values()[0]
    darpa_picks = darpa_ranking.values()[0]

    from scipy import stats
    # scipy takes them as ordered lists
    teamorder = list(byus.keys())  
    vals_us = [ byus[t] for t in teamorder ]
    vals_darpa = [ bydarpa[t] for t in teamorder ]
    tau, p_value = stats.kendalltau(vals_us, vals_darpa)

    def names(teams_set):
        return '[' + ' '.join(sorted(n.split()[0] for n in teams_set)) + ']'

    if our_picks == darpa_picks:
        print "[  ] All first choice(s)",names(our_picks),"match, excellent!"
    elif our_picks.isdisjoint(darpa_picks):
        print "[XX] Our first choice(s)",names(our_picks)," completely different from DARPA's",names(darpa_picks)
    else:
        print "[__] Partial match between our first choice(s) and DARPA's. Both have",names(darpa_picks&our_picks),"(we also have:",names(our_picks-darpa_picks)," -- darpa also has:",names(darpa_picks-our_picks),")"
    print "     FOR US:"
    for score,teams in our_ranking.iteritems():
        print "       ","%+.4f"%score,names(teams)
    print "     DARPA:"
    for score,teams in darpa_ranking.iteritems():
        print "       ","%+.4f"%score,names(teams)
    print "  %s Kendall tau: %.4f (p-value for being correlated: %.6f)" % (("<7" if tau < 0.7 else "<8") if tau < 0.8 else "  ", tau, p_value)
项目:document-qa    作者:allenai    | 项目源码 | 文件源码
def evaluate(self, data: List[ContextAndQuestion], true_len, **kwargs):
        best_spans = kwargs["span"]
        span_logits = kwargs["score"]
        if self.eval == "triviaqa":
            scores = trivia_span_scores(data, best_spans)
        elif self.eval == "squad":
            scores = squad_span_scores(data, best_spans)
        else:
            raise RuntimeError()

        has_answer = np.array([len(x.answer.answer_spans) > 0 for x in data])

        selected_paragraphs = {}
        for i, point in enumerate(data):
            if self.per_doc:
                key = (point.question_id, point.doc_id)
            else:
                key = point.question_id
            if key not in selected_paragraphs:
                selected_paragraphs[key] = i
            elif span_logits[i] > span_logits[selected_paragraphs[key]]:
                selected_paragraphs[key] = i
        selected_paragraphs = list(selected_paragraphs.values())

        out = {
            "question-text-em": scores[selected_paragraphs, 2].mean(),
            "question-text-f1": scores[selected_paragraphs, 3].mean(),
        }

        if self.k_tau:
            out["text-em-k-tau"] = kendalltau(span_logits, scores[:, 2])[0]
            out["text-f1-k-tau"] = kendalltau(span_logits, scores[:, 3])[0]

        if self.paragraph_level:
            out["paragraph-text-em"] = scores[has_answer, 2].mean()
            out["paragraph-text-f1"] = scores[has_answer, 3].mean()

        prefix = "b%d/" % self.bound
        return Evaluation({prefix+k: v for k,v in out.items()})
项目:document-qa    作者:allenai    | 项目源码 | 文件源码
def evaluate(self, data: List[ContextAndQuestion], true_len, **kargs):
        if self.text_eval == "triviaqa":
            scores = trivia_span_scores(data, kargs["spans"])
        elif self.text_eval == "squad":
            scores = squad_span_scores(data, kargs["spans"])
        else:
            raise RuntimeError()

        has_answer = [len(x.answer.answer_spans) > 0 for x in data]
        aggregated_scores = scores[has_answer].mean(axis=0)
        prefix ="b%d/" % self.bound
        scalars = {
            prefix + "accuracy": aggregated_scores[0],
            prefix + "f1": aggregated_scores[1],
            prefix + "text-accuracy": aggregated_scores[2],
            prefix + "text-f1": aggregated_scores[3]
        }

        if self.rank_metric == "spr":
            metric = spearmanr
        elif self.rank_metric == "k-tau":
            metric = kendalltau
        else:
            raise ValueError()

        if "none_prob" in kargs:
            none_conf = kargs["none_prob"]
            scalars[prefix + "none-text-f1-" + self.rank_metric] = metric(none_conf, scores[:, 3])[0]
            scalars[prefix + "none-span-accuracy-" + self.rank_metric] = metric(none_conf, scores[:, 0])[0]

        conf = kargs["conf"]
        scalars[prefix + "score-text-f1-" + self.rank_metric] = metric(conf, scores[:, 3])[0]
        scalars[prefix + "score-span-accuracy-" + self.rank_metric] = metric(conf, scores[:, 0])[0]
        return Evaluation(scalars)
项目:hash2vec    作者:Roj    | 项目源码 | 文件源码
def distance(a,b):
    #return  1-dot(norm(a),norm(b)) #cosine similarity
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    return 1-sci.kendalltau(a,b)[0] #kendall tau



# Load the benchmark
项目:hash2vec    作者:Roj    | 项目源码 | 文件源码
def distance(a,b):
    return scipy.spatial.distance.cosine(a,b) # ya incluye el 1-cos(ab)
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    #return sci.kendalltau(a,b) #kendall tau



# Load the benchmark
项目:hash2vec    作者:Roj    | 项目源码 | 文件源码
def distance(a,b):
    return  1-dot(norm(a),norm(b)) #cosine similarity
    #return sum(pow(a[i]-b[i],2) for i in range(len(b))) #euclidean norm
    #pearson correlation in negative so lower is better
    #return 1- dot(norm(a),norm(b))
    #tanimoto distance
    #return 1 - dot(a,b)/(dot(a,a) + dot(b,b) - dot(a,b))
    #return sci.kendalltau(a,b) #kendall tau



# Load the benchmark
项目:AppsOfDataAnalysis    作者:nhanloukiala    | 项目源码 | 文件源码
def select(self, X, Y, select_count=100):
        corr = []

        for i in range(X.shape[1]):
            kd = kendalltau(X[:, i], Y)
            corr.append((i, abs(kd.correlation)))

        corr = sorted(corr, key=operator.itemgetter)[0:select_count]
        indices = [x for x, y in corr]
        return X[:, indices], indices
项目:nea    作者:nusnlp    | 项目源码 | 文件源码
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
项目:cptm    作者:NLeSC    | 项目源码 | 文件源码
def do_kendallt(list1, list2, alpha=0.05):
    c, p = kendalltau(list1, list2)

    if p < alpha:
        return c
    return 'n.s.'
项目:Stock-Prediction-Time-Series-Analysis-Python    作者:Nekooeimehr    | 项目源码 | 文件源码
def Conf_Measure(RegModel, Train_Data, True_Labels, ModelType):
    Predictions = RegModel.predict(Train_Data)        
    tau, p_value = stats.kendalltau(True_Labels, Predictions)
    R2_Measure = r2_score(True_Labels, Predictions)
    print('The Kindell Coefficient of ', ModelType, ' model is ', tau,' with a p-value of ',p_value)
    print('The R Square of ', ModelType, ' model is ', R2_Measure)
    print('')
    return(tau, p_value, R2_Measure)
项目:microbiomeHD    作者:cduvallet    | 项目源码 | 文件源码
def concordance(series1, series2, method, nreps=1000):
    """
    Measures the concordance between two pandas Series and returns a pvalue
    and measure of concordance.

    Parameters
    ----------
    series1, series2 : pandas Series
        Series with matching indexes.
    method : str
        ['fisher', 'spearman', 'kendalltau', 'empirical', 'cohen']
    nreps : int
        number of repititions to build the null. Only needed if method is
        'empirical'

    Returns
    -------
    measure : float
        some sort of measure of concordance (e.g. r for the correlation
        methods, n_observed - mean(n_expected) for empirical, etc)
    p : float
        p value of observed concordance between series1 and series2
    """

    if method == 'fisher':
        # Note: this automatically ignores any bugs which were not present
        # in both series.
        mat = pd.crosstab(series1, series2)
        return fisher_exact(mat)

    elif method == 'spearman':
        return spearmanr(series1, series2)

    elif method == 'kendalltau':
        return kendalltau(series1, series2, nan_policy='omit')

    elif method == 'empirical':
        return empirical_pval(series1, series2, nreps)

    elif method == 'cohen':
        tmp = pd.concat((series1, series2), axis=1).dropna()
        return cohen_kappa_score(tmp.iloc[:, 0], tmp.iloc[:, 1]), np.nan

    else:
        raise ValueError('Unknown concordance method.')