Python scipy.stats 模块,spearmanr() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.spearmanr()

项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def calc_word_sim(model, eval_file):
    df = pd.read_csv(eval_file, sep=',', header=0) # eval dataset
    col1, col2, score = df.columns.values
    model_vocab = model.vocab.keys()
    ground = []
    sys = []
    for idx, row in df.iterrows():
        if row[col1] in model_vocab and row[col2] in model_vocab:
            ground.append(float(row[score]))
            sys.append(model.similarity(row[col1], row[col2]))

    # compute Spearman's rank correlation coefficient (https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient)
    print sys
    # import pdb;pdb.set_trace()
    corr, p_val = stats.spearmanr(sys, ground)
    logger.info("# of pairs found: %s / %s" % (len(ground), len(df)))
    logger.info("correlation: %s" % corr)
    return corr, p_val
项目:kor2vec    作者:dongjun-Lee    | 项目源码 | 文件源码
def word_sim_test(filename, pos_vectors):
    delim = ','
    actual_sim_list, pred_sim_list = [], []
    missed = 0

    with open(filename, 'r') as pairs:
        for pair in pairs:
            w1, w2, actual_sim = pair.strip().split(delim)

            try:
                w1_vec = create_word_vector(w1, pos_vectors)
                w2_vec = create_word_vector(w2, pos_vectors)
                pred = float(np.inner(w1_vec, w2_vec))
                actual_sim_list.append(float(actual_sim))
                pred_sim_list.append(pred)

            except KeyError:
                missed += 1

    spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list)
    pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list)

    return spearman, pearson, missed
项目:SIF    作者:PrincetonML    | 项目源码 | 文件源码
def sim_getCorrelation(We,words,f, weight4ind, scoring_function, params):
    f = open(f,'r')
    lines = f.readlines()
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    scores = scoring_function(We,x1,x2,m1,m2, params)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:SIF    作者:PrincetonML    | 项目源码 | 文件源码
def getCorrelation(model,words,f, params=[]):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    if params and params.weightfile:
        m1 = data_io.seq2weight(x1, m1, params.weight4ind)
        m2 = data_io.seq2weight(x2, m2, params.weight4ind)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def correlations(A,B,pc_n=100):
    p = (1 - distance.correlation(A.flatten(),B.flatten()))
    spear = spearmanr(A.flatten(),B.flatten())
    dist_genes = np.zeros(A.shape[0])
    for i in range(A.shape[0]):
        dist_genes[i] = 1 - distance.correlation(A[i],B[i])
    pg = (np.average(dist_genes[np.isfinite(dist_genes)]))
    dist_sample = np.zeros(A.shape[1])
    for i in range(A.shape[1]):
        dist_sample[i] = 1 - distance.correlation(A[:,i],B[:,i])
    ps = (np.average(dist_sample[np.isfinite(dist_sample)]))
    pc_dist = []
    if pc_n > 0:
        u0,s0,vt0 = np.linalg.svd(A)
        u,s,vt = np.linalg.svd(B)
        for i in range(pc_n):
            pc_dist.append(abs(1 - distance.cosine(u0[:,i],u[:,i])))
        pc_dist = np.array(pc_dist)
    return p,spear[0],pg,ps,pc_dist
项目:procgen    作者:juancroldan    | 项目源码 | 文件源码
def white4D_functional():
    print("Testing correlation for 4D white noise")
    N = 20
    x1 = randrange(-1000, 1000, 1)
    y1 = randrange(-1000, 1000, 1)
    z1 = randrange(-1000, 1000, 1)
    w1 = randrange(-1000, 1000, 1)
    x2 = x1 + randrange(-1000, 1000, 1)
    y2 = y1 + randrange(-1000, 1000, 1)
    z2 = z1 + randrange(-1000, 1000, 1)
    w2 = w1 + randrange(-1000, 1000, 1)
    values1 = [[[[combined(white, x/N, y/N) for x in range(x1, x1 + N)] for y in range(y1, y1 + N)] for z in range(z1, z1 + N)] for w in range(w1, w1 + N)]
    values2 = [[[[combined(white, x/N, y/N) for x in range(x2, x2 + N)] for y in range(y2, y2 + N)] for z in range(z2, z2 + N)] for w in range(w2, w2 + N)]
    rho = spearmanr(values1, values2, axis = None)
    assert abs(rho[0]) < 0.5
    print("rho = %s" % rho[0])
    print("\tNot signifying correlation found")
项目:wordsim    作者:recski    | 项目源码 | 文件源码
def run(self):
        for d_type, datasets in self.sim_datasets.iteritems():
            for data, fn in datasets:
                logging.info(
                    'testing on data {0} of type {1} ({2} pairs)'.format(
                        fn, d_type, len(data.pairs)))
                for e_type, models in self.e_models.iteritems():
                    for model, fn in models:
                        logging.info(
                            '\ttesting embedding {0} of type {1}'.format(
                                fn, e_type))
                        answers, gold_sims, oovs = [], [], 0
                        for (w1, w2), gold in data.pairs.iteritems():
                            sim = model.get_sim(w1, w2)
                            if sim:
                                answers.append(sim)
                                gold_sims.append(gold)
                            else:
                                oovs += 1
                        corr = spearmanr(answers, gold_sims)
                        logging.info('Spearman correlation: {0}'.format(corr))
                        logging.info('pairs skipped (OOVs): {0}'.format(oovs))
项目:pyktrader2    作者:harveywwu    | 项目源码 | 文件源码
def MA_RIBBON(df, ma_series):
    ma_array = np.zeros([len(df), len(ma_series)])
    ema_list = []
    for idx, ma_len in enumerate(ma_series):
        ema_i = EMA(df, n = ma_len, field = 'close')
        ma_array[:, idx] = ema_i
        ema_list.append(ema_i)
    corr = np.empty([len(df)])
    pval = np.empty([len(df)])
    dist = np.empty([len(df)])
    corr[:] = np.NAN
    pval[:] = np.NAN
    dist[:] = np.NAN
    max_n = max(ma_series)
    for idy in range(len(df)):
        if idy >= max_n - 1:
            corr[idy], pval[idy] = stats.spearmanr(ma_array[idy,:], range(len(ma_series), 0, -1))
            dist[idy] = max(ma_array[idy,:]) - min(ma_array[idy,:])
    corr_ts = pd.Series(corr*100, index = df.index, name = "MARIBBON_CORR")
    pval_ts = pd.Series(pval*100, index = df.index, name = "MARIBBON_PVAL")
    dist_ts = pd.Series(dist, index = df.index, name = "MARIBBON_DIST")
    return pd.concat([corr_ts, pval_ts, dist_ts] + ema_list, join='outer', axis=1)
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
    if len(p1.split()[0].split('_')) == 2:
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
    else:
            X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getCorrelation2(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    sseq1 = []
    sseq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
    sseq1.append(SX1)
    sseq2.append(SX2)
        golds.append(score)
    x1,m1,s1 = utils.prepare_data2(seq1,sseq1)
    x2,m2,s2 = utils.prepare_data2(seq2,sseq2)
    scores = model.scoring_function2(x1,x2,m1,m2,s1,s2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:mmfeat    作者:douwekiela    | 项目源码 | 文件源码
def spearman(self, dataset):
        if not isinstance(dataset, list) \
                or len(dataset) == 0 \
                or len(dataset[0]) != 3 \
                or not isinstance(dataset[0][2], float):
            raise TypeError('Dataset is not of correct type, list of [str, str, float] triples expected.')
        gs_scores, sys_scores = [], []
        for one, two, gs_score in dataset:
            try:
                sys_score = self.sim(one, two)
                gs_scores.append(gs_score)
                sys_scores.append(sys_score)
            except KeyError:
                if self.reportMissing:
                    print('Warning: Missing pair %s-%s - skipping' % (one, two))
                continue
        return spearmanr(gs_scores, sys_scores)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def get_corr_func(method):
    if method in ['kendall', 'spearman']:
        from scipy.stats import kendalltau, spearmanr

    def _pearson(a, b):
        return np.corrcoef(a, b)[0, 1]

    def _kendall(a, b):
        rs = kendalltau(a, b)
        if isinstance(rs, tuple):
            return rs[0]
        return rs

    def _spearman(a, b):
        return spearmanr(a, b)[0]

    _cor_methods = {
        'pearson': _pearson,
        'kendall': _kendall,
        'spearman': _spearman
    }
    return _cor_methods[method]
项目:BioNLP-2016    作者:cambridgeltl    | 项目源码 | 文件源码
def evaluate1Word(wv, reference):
    """Evaluate wv against reference, return (rho, count) where rwo is
    Spearman's rho and count is the number of reference word pairs
    that could be evaluated against.
    """
    count=0
    gold, predicted = [], []
    for words, sim in sorted(reference, key=lambda ws: ws[1]):
        if " " not in words[0] and " " not in words[1]:
            #print words[0],words[1]
            try:
                v1, v2 = wv[words[0]], wv[words[1]]
            except KeyError:
                count+=1
                continue
            #print words
            gold.append((words, sim))
            predicted.append((words, cosine(v1, v2)))

    simlist = lambda ws: [s for w,s in ws]
    rho, p = spearmanr(simlist(gold), simlist(predicted))
    print "Word not found in WordVector",count
    return (rho, len(gold))
项目:wub    作者:nanoporetech    | 项目源码 | 文件源码
def _corrfunc(x, y, **kws):
    """ Annotate grid with correaltion coefficient.
    Solution from http://stackoverflow.com/a/30942817
    """
    if args.c == 'spearman':
        r, _ = stats.spearmanr(x, y)
        corr_type = 'Rho'
    elif args.c == 'pearson':
        r, _ = stats.pearsonr(x, y)
        corr_type = 'r'
    else:
        raise Exception('Invalid correlation statistic.')
    correlations.append(r)
    ax = plotter.plt.gca()
    ax.annotate("{} = {:.2f}".format(corr_type, r),
                xy=(.1, .9), xycoords=ax.transAxes)
项目:kaggle-quora-solution-8th    作者:qqgeogor    | 项目源码 | 文件源码
def get_feature_importance(feature):
    import scipy.stats as sps
    import pandas as pd
    y_train = pd.read_csv('../data/train.csv')['is_duplicate']
    return  sps.spearmanr(feature,y_train)[0]

# import pickle
# pickle.dump(X_train,open("data_train.pkl", 'wb'), protocol=2)
#
# data_file=['test_deptree','test_glove_sim_dist','test_pca_glove',
#            'test_pca_pattern','test_w2w','test_pos','test_pca_char']
#
# path='../test/'
# for it in range(6):
#     tmp=[]
#     flist=[item+str(it) for item in data_file]
#     test=np.empty((400000,0))
#     if it==5:
#         test=np.empty((345796,0))
#     for f in flist:
#         test=np.hstack([test,pd.read_pickle(path+f+'.pkl')])
#     pickle.dump(test,open('data_test{0}.pkl'.format(it),'wb'),protocol=2)
项目:iclr2016    作者:jwieting    | 项目源码 | 文件源码
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:neurobind    作者:Kyubyong    | 项目源码 | 文件源码
def validation_check():
    # Load graph
    g = Graph(is_training=False); print("Graph loaded")

    # Load data
    X, Y = load_data(mode="val")

    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            # Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!")

            # Get model
            mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1]  # model name

            # Inference
            if not os.path.exists(hp.results): os.mkdir(hp.results)
            with open(os.path.join(hp.results, "validation_results.txt"), 'a') as fout:
                expected, predicted = [], []
                for step in range(len(X) // hp.batch_size):
                    x = X[step * hp.batch_size: (step + 1) * hp.batch_size]
                    y = Y[step * hp.batch_size: (step + 1) * hp.batch_size]

                    # predict intensities
                    logits = sess.run(g.logits, {g.x: x})

                    expected.extend(list(y))
                    predicted.extend(list(logits))

                # Get spearman coefficients
                score, _ = spearmanr(expected, predicted)
                fout.write("{}\t{}\n".format(mname, score))
项目:systematic-metafeatures    作者:fhpinto    | 项目源码 | 文件源码
def _calculate(self, input):
        input = input[~np.isnan(input).any(axis=1)]

        return spearmanr(input[:,0], input[:,1])[0]
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def series_corr(word_year_series_1, word_year_series_2, i_year_words, start_year=1900, end_year=2000, series_1_norms=None, series_2_norms=None):
    """
    Gets the per-year correlation between the two word time series.
    Words are included even if they have values missing for a year, but there missing values are excluded from the year in question.
    """
    year_corrs = []
    year_ps = []
    years = range(start_year, end_year + 1)
    if start_year not in i_year_words:
        i_year_words = {year:i_year_words for year in years}
    if series_1_norms == None:
        series_1_norms = ([0 for year in years], [1 for year in years])
    if series_2_norms == None:
        series_2_norms = ([0 for year in years], [1 for year in years])
    for i in xrange(len(years)):
        year = years[i]
        s1 = []
        s2 = []
        for word in i_year_words[year]:
            if word in word_year_series_1 and word in word_year_series_2:
                if not np.isnan(word_year_series_1[word][year]) and not np.isnan(word_year_series_2[word][year]):
                    s1.append((word_year_series_1[word][year] - series_1_norms[0][i]) / series_1_norms[1][i])
                    s2.append((word_year_series_2[word][year] - series_2_norms[0][i]) / series_2_norms[1][i])
        corr, p = spearmanr(s1, s2)
        year_corrs.append(corr)
        year_ps.append(p)
    return year_corrs, year_ps
项目:MP-CNN-Variants    作者:tuzhucheng    | 项目源码 | 文件源码
def get_scores(self):
        self.model.eval()
        num_classes = self.dataset_cls.NUM_CLASSES
        predict_classes = torch.arange(1, num_classes + 1).expand(self.batch_size, num_classes)
        test_kl_div_loss = 0
        predictions = []
        true_labels = []

        for batch in self.data_loader:
            output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
            test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0]
            # handle last batch which might have smaller size
            if len(predict_classes) != len(batch.sentence_1):
                predict_classes = torch.arange(1, num_classes + 1).expand(len(batch.sentence_1), num_classes)

            if self.data_loader.device != -1:
                with torch.cuda.device(self.device):
                    predict_classes = predict_classes.cuda()

            true_labels.append((predict_classes * batch.label.data).sum(dim=1))
            predictions.append((predict_classes * output.data.exp()).sum(dim=1))

            del output

        predictions = torch.cat(predictions).cpu().numpy()
        true_labels = torch.cat(true_labels).cpu().numpy()
        test_kl_div_loss /= len(batch.dataset.examples)
        pearson_r = pearsonr(predictions, true_labels)[0]
        spearman_r = spearmanr(predictions, true_labels)[0]

        return [pearson_r, spearman_r, test_kl_div_loss], ['pearson_r', 'spearman_r', 'KL-divergence loss']
项目:aes    作者:feidong1991    | 项目源码 | 文件源码
def spearman(y_true, y_pred):
    """
    Calculate Spearman's rank correlation coefficient between ``y_true`` and
    ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: Spearman's rank correlation coefficient if well-defined, else 0
    """
    ret_score = spearmanr(y_true, y_pred)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def compare_distances(A,B,random_samples=[],s=200,pvalues=False):
    if len(random_samples) == 0:
        random_samples = np.zeros(A.shape[1],dtype=np.bool)
        random_samples[:min(s,A.shape[1])] = True
        np.random.shuffle(random_samples)
    dist_x = distance.pdist(A[:,random_samples].T,'euclidean')
    dist_y = distance.pdist(B[:,random_samples].T,'euclidean')
    pear = pearsonr(dist_x,dist_y)
    spear = spearmanr(dist_x,dist_y)
    if pvalues:
        return pear,spear
    else:
        return pear[0],spear[0]
项目:MUSE    作者:MiuLab    | 项目源码 | 文件源码
def calAvgSimC(test_score, senseVec1, senseScore1,senseVec2, senseScore2):
  assert(len(senseVec1)==len(senseVec2))
  avgCos = []
  for t in xrange(len(senseVec1)):
    thisCos = []
    p1 = (senseScore1[t])
    p2 = (senseScore2[t])
    for i in xrange(len(senseVec1[t])):
      for j in xrange(len(senseVec2[t])):
        thisCos.append((1-cosine(senseVec1[t][i],senseVec2[t][j]))*p1[i]*p2[j])
    avgCos.append(np.sum(thisCos))
  return spearmanr(test_score, avgCos)[0]
项目:MUSE    作者:MiuLab    | 项目源码 | 文件源码
def calMaxSimC(test_score, senseVec1, senseScore1,senseVec2, senseScore2):
  assert(len(senseVec1)==len(senseVec2))
  avgCos = []
  for t in xrange(len(senseVec1)):
    i = np.argmax(senseScore1[t])
    j = np.argmax(senseScore2[t])
    thisCos = (1-cosine(senseVec1[t][i],senseVec2[t][j])) 
    avgCos.append(thisCos)
  return spearmanr(test_score, avgCos)[0]
项目:procgen    作者:juancroldan    | 项目源码 | 文件源码
def white2D_functional():
    print("Testing correlation for 2D white noise")
    N = 100
    x1 = randrange(-1000, 1000, 1)
    y1 = randrange(-1000, 1000, 1)
    x2 = x1 + randrange(-1000, 1000, 1)
    y2 = y1 + randrange(-1000, 1000, 1)
    values1 = [[combined(white, x/N, y/N) for x in range(x1, x1 + N)] for y in range(y1, y1 + N)]
    values2 = [[combined(white, x/N, y/N) for x in range(x2, x2 + N)] for y in range(y2, y2 + N)]
    rho = spearmanr(values1, values2, axis = None)
    assert abs(rho[0]) < 0.5
    print("rho = %s" % rho[0])
    print("\tNot signifying correlation found")
项目:procgen    作者:juancroldan    | 项目源码 | 文件源码
def white3D_functional():
    print("Testing correlation for 3D white noise")
    N = 100
    x1 = randrange(-1000, 1000, 1)
    y1 = randrange(-1000, 1000, 1)
    z1 = randrange(-1000, 1000, 1)
    x2 = x1 + randrange(-1000, 1000, 1)
    y2 = y1 + randrange(-1000, 1000, 1)
    z2 = z1 + randrange(-1000, 1000, 1)
    values1 = [[[combined(white, x/N, y/N) for x in range(x1, x1 + N)] for y in range(y1, y1 + N)] for z in range(z1, z1 + N)]
    values2 = [[[combined(white, x/N, y/N) for x in range(x2, x2 + N)] for y in range(y2, y2 + N)] for z in range(z2, z2 + N)]
    rho = spearmanr(values1, values2, axis = None)
    assert abs(rho[0]) < 0.5
    print("rho = %s" % rho[0])
    print("\tNot signifying correlation found")
项目:wordsim    作者:recski    | 项目源码 | 文件源码
def spearman_scorer(estimator, X, y):
    logging.info('predicting ...')
    predicted = estimator.predict(y)
    return spearmanr(list(predicted), y)
项目:wordsim    作者:recski    | 项目源码 | 文件源码
def test():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s : " +
        "%(module)s (%(lineno)s) - %(levelname)s - %(message)s")

    data = [((f[0], f[1]), float(f[2]))
            for f in [line.strip().split("|||")
                      for line in open(sys.argv[1])]]

    print "sample data:", data[:3]

    train_data, devel_data, test_data = cut(data)

    logging.info('loading model...')
    glove_embedding = GloveEmbedding(sys.argv[2])
    logging.info('done!')
    dim = int(sys.argv[3])
    X_train = featurize(train_data, glove_embedding, dim)

    Y_train = np.array([e[1] for e in train_data])

    logging.info("Input shape: {0}".format(X_train.shape))
    print X_train[:3]
    logging.info("Label shape: {0}".format(Y_train.shape))
    print Y_train[:3]

    input_dim = X_train.shape[1]
    output_dim = 1
    model = create_model(input_dim, output_dim)
    model.fit(X_train, Y_train, nb_epoch=int(sys.argv[4]), batch_size=32)

    X_devel = featurize(devel_data, glove_embedding, dim)
    Y_devel = np.array([e[1] for e in devel_data])

    pred = model.predict_proba(X_devel, batch_size=32)
    corr = spearmanr(pred, Y_devel)
    print "Spearman's R: {0}".format(corr)
项目:wordsim    作者:recski    | 项目源码 | 文件源码
def evaluate(model, dev_data):
    pred = model.predict_proba(dev_data.data, batch_size=32)
    corr = spearmanr(pred, dev_data.labels)
    print "Spearman's R: {0}".format(corr)
项目:pyktrader2    作者:harveywwu    | 项目源码 | 文件源码
def ma_ribbon(df, ma_series):
    ma_array = np.zeros([len(df)])
    for idx, ma_len in enumerate(ma_series):
        key = 'EMA_CLOSE_' + str(ma_len)
        ema(df, ma_len, field = 'close')
        ma_array[idx] = df[key][-1]
    corr, pval = stats.spearmanr(ma_array, range(len(ma_series), 0, -1))
    dist = max(ma_array) - min(ma_array)
    df["MARIBBON_CORR"][-1] = corr * 100
    df["MARIBBON_PVAL"][-1] = pval * 100
    df["MARIBBON_DIST"][-1] = dist
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def getSpearmanr(infile):
    x_list = list()
    y_list = list()
    for i, line in enumerate(open(infile, 'r')):
        words = line.strip('\n').split('\t')
        x_list.append((i, float(words[2])))
        y_list.append((i, float(words[3])))
    x_list = sorted(x_list, key=lambda x:x[1])
    y_list = sorted(y_list, key=lambda x:x[1])
    x_list = sorted([(x, i) for i, (x, score) in enumerate(x_list)], key=lambda x: x[0])
    y_list = sorted([(y, i) for i, (y, score) in enumerate(y_list)], key=lambda x: x[0])
    x_list, y_list = np.array(x_list), np.array(y_list)
    rho, pval = spearmanr(x_list[:, 1], y_list[:, 1])
    return rho, pval
项目:aes-gated-word-char    作者:unkn0wnxx    | 项目源码 | 文件源码
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
项目:simec    作者:cod3licious    | 项目源码 | 文件源码
def check_similarity_match(X_embed, S):
    """
    Since SimEcs are supposed to project the data into an embedding space where the target similarities
    can be linearly approximated, check if X_embed*X_embed^T = S
    (check mean squared error and Spearman correlation coefficient)
    Inputs:
        - X_embed: Nxd matrix with coordinates in the embedding space
        - S: NxN matrix with target similarities (do whatever transformations were done before using this
             as input to the SimEc, e.g. centering, etc.)
    Returns:
        - msq, rho, r: mean squared error, Spearman and Pearson correlation coefficent between linear kernel of embedding
                       and target similarities (mean squared error is more exact, corrcoef a more relaxed error measure)
    """
    # compute linear kernel as approximated similarities
    S_approx = X_embed.dot(X_embed.T)
    # to get results that are comparable across similarity measures, we have to normalize them somehow,
    # in this case by dividing by the absolute max value of the target similarity matrix
    n = np.max(np.abs(S))
    S_norm = S/n
    S_approx /= n
    # compute mean squared error
    msqe = np.mean((S_norm - S_approx) ** 2)
    # compute Spearman correlation coefficient
    rho = spearmanr(S_norm.flatten(), S_approx.flatten())[0]
    # compute Pearson correlation coefficient
    r = pearsonr(S_norm.flatten(), S_approx.flatten())[0]
    return msqe, rho, r
项目:microTC    作者:INGEOTEC    | 项目源码 | 文件源码
def compute_score(self, conf, hy):
        conf['_r2'] = r2_score(self.test_y, hy)
        conf['_spearmanr'] = spearmanr(self.test_y, hy)[0]
        conf['_pearsonr'] = pearsonr(self.test_y, hy)[0]
        conf['_score'] = conf['_' + self.score]
        # print(conf)
项目:bear    作者:theeluwin    | 项目源码 | 文件源码
def profile(filepath, n, exact=True, save=False, verbose=True, use_gpu=False, report=open('temp.txt', 'w')):
    if exact:
        tol = 0
    else:
        tol = None
    solpath = 'data/{}_sol.dat'.format(filepath2name(filepath))
    if not os.path.isfile(solpath):
        solve(filepath, n, seed=0, verbose=verbose)
    q, r, ranks = pickle.load(open(solpath, 'rb'))
    if use_gpu:
        model_classes = [PPRIterativeTF, PPRLUDecompositionTF, PPRBearTF]
    else:
        model_classes = [PPRIterative, PPRLUDecomposition, PPRBear]
    for model_class in model_classes:
        with tf.Session() as sess:
            start = time.time()
            if use_gpu:
                model = model_class(sess, n, filepath, drop_tol=tol, verbose=verbose)
            else:
                model = model_class(drop_tol=tol, verbose=verbose)
                model.preprocess(filepath)
            end = time.time()
            if use_gpu:
                sess.run(tf.global_variables_initializer())
            elapsed = end - start
            if save:
                model.save('models/{}.ppr'.format(model.alias))
            print("[{}]({},{},n={})".format(model.alias, 'gpu' if use_gpu else 'cpu', 'exact' if exact else 'apprx', n), file=report)
            print("preprocess\t{}".format(elapsed), file=report)
            start = time.time()
            r_ = model.query(q)
            end = time.time()
            elapsed = end - start
            print("query time\t{}".format(elapsed), file=report)
            ranks_ = pr2ranks(r_)
            spearman = spearmanr(ranks, ranks_)
            r_ = r_ / r_.sum()
            print("diff norm\t{}".format(norm(r - r_)), file=report)
            print("cosine sim\t{}".format(r.dot(r_) / norm(r) / norm(r_)), file=report)
            print("spearman corr\t{}".format(spearman.correlation), file=report)
            print("", file=report)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_corr_rank(self):
        tm._skip_if_no_scipy()

        import scipy
        import scipy.stats as stats

        # kendall and spearman
        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        A[-5:] = A[:5]
        result = A.corr(B, method='kendall')
        expected = stats.kendalltau(A, B)[0]
        self.assertAlmostEqual(result, expected)

        result = A.corr(B, method='spearman')
        expected = stats.spearmanr(A, B)[0]
        self.assertAlmostEqual(result, expected)

        # these methods got rewritten in 0.8
        if scipy.__version__ < LooseVersion('0.9'):
            raise nose.SkipTest("skipping corr rank because of scipy version "
                                "{0}".format(scipy.__version__))

        # results from R
        A = Series(
            [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, -
             0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606])
        B = Series(
            [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292,
             1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375])
        kexp = 0.4319297
        sexp = 0.5853767
        self.assertAlmostEqual(A.corr(B, method='kendall'), kexp)
        self.assertAlmostEqual(A.corr(B, method='spearman'), sexp)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_nancorr_spearman(self):
        tm.skip_if_no_package('scipy.stats')
        from scipy.stats import spearmanr
        targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
        targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
        self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
                                     method='spearman')
        targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
        targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
        self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
                                     method='spearman')
项目:nonce2vec    作者:minimalparts    | 项目源码 | 文件源码
def spearman(x,y):
    return stats.spearmanr(x, y)[0]



###########################################
# Start
###########################################
项目:RankIQA    作者:xialeiliu    | 项目源码 | 文件源码
def forward(self, bottom, top):
        """Compute the SROCC and LCC and output them to top."""
        #ipdb.set_trace()
        testPreds = bottom[0].data
        testPreds = np.reshape(testPreds,testPreds.shape[0])
        testLabels = bottom[1].data
        testLabels = np.reshape(testLabels,testLabels.shape[0])
        top[0].data[...] = stats.spearmanr(testPreds, testLabels)[0]
        top[1].data[...] = stats.pearsonr(testPreds, testLabels)[0]
项目:seq2seq_parser    作者:trangham283    | 项目源码 | 文件源码
def comp_corr(df, ptype):
    if ptype=='begin':
        valid_df = df[(df.p00 >2)]
    else:
        valid_df = df[(df.p11 >2)]
    lengths = valid_df.span_length.values
    if ptype == 'begin':
        plengths = valid_df.p00.values
    else:
        plengths = valid_df.p11.values
    print float(len(valid_df))/len(df), '\t', stats.spearmanr(plengths, lengths)[0]
项目:ADEM    作者:mike-n-7    | 项目源码 | 文件源码
def _correlation(self, output, score):
        return  [spearmanr(output, score), pearsonr(output, score)]
项目:nea    作者:nusnlp    | 项目源码 | 文件源码
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
项目:cptm    作者:NLeSC    | 项目源码 | 文件源码
def do_spearmanr(list1, list2, alpha=0.05):
    c, p = spearmanr(list1, list2)

    if p < alpha:
        return c
    return 'n.s.'
项目:100knock2017    作者:tmu-nlp    | 项目源码 | 文件源码
def calcroh(file_name):
    human_list = list()
    pred_list = list()
    with open(file_name) as i_f:
        for line in i_f:
            human_list.append(line.strip().split()[2])
            pred_list.append(line.strip().split()[3])
    return spearmanr(human_list, pred_list)
项目:100knock2017    作者:tmu-nlp    | 项目源码 | 文件源码
def cal_spear(text):
    list_1 = []
    list_2 = []
    with open(text) as i_f:
        for line in i_f:
            list_1.append(line.strip().split()[2])
            list_2.append(line.strip().split()[3])
    return spearmanr(list_1,list_2)
项目:kaggle-quora-solution-8th    作者:qqgeogor    | 项目源码 | 文件源码
def generate_indicator_(gram_q1,gram_q2,N):
    len_gram_q1 = list(map(len,gram_q1))
    len_gram_q2 = list(map(len,gram_q2))
    max_len = max(max(len_gram_q1),max(len_gram_q2))
    q1_indicator = np.zeros((N,max_len))
    q2_indicator = np.zeros((N,max_len))
    for i in tqdm(np.arange(N)):
        for j,w in enumerate(gram_q1[i]):
            if w in gram_q2[i]:
                q1_indicator[i,j] = 1
        for j,w in enumerate(gram_q2[i]):
            if w in gram_q1[i]:
                q2_indicator[i,j] = 1
    return q1_indicator,q2_indicator
    # sps.spearmanr(q1_indicator[:,1],y_train)[0]
项目:kaggle-quora-solution-8th    作者:qqgeogor    | 项目源码 | 文件源码
def calc_dis_jarccard2(neighs,neighs2):
    sim_fea = []
    for i in neighs:
        for j in neighs2:
            if i==j:continue
            if (j in index_q) and (i in index_q):
                q_str = index_q[i]
                nei_str = index_q[j]
                s1 = set(q_str.lower().split())
                s2 = set(nei_str.lower().split())
                sim_fea.append(dist_utils._jaccard_coef(s1, s2))
    aggregation_mode = ["mean", "std", "max", "min", "median"]
    aggregator = [None if m == "" else getattr(np, m) for m in aggregation_mode]
    score = []
    for n, agg in enumerate(aggregator):
        if len(sim_fea) == 0:
            s = -1
        try:
            s = agg(sim_fea)
        except:
            s = -1
        score.append(s)
    return score


# sps.spearmanr(train_fea,train['is_duplicate'])[0]
项目:kaggle-quora-solution-8th    作者:qqgeogor    | 项目源码 | 文件源码
def drop_feature(data):
    drop_list = []
    for i in range(data.shape[1]):
        for j in range(i,data.shape[1]):
            s = sps.spearmanr(data[:,i],data[:,j])[0]
            if abs(s)>0.8:
                drop_list.append(j)
    drop_list = set(drop_list)
    return  drop_list


#select imp feature