Python scipy.stats 模块,pearsonr() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.pearsonr()

项目:kaggle-review    作者:daxiongshu    | 项目源码 | 文件源码
def corr_fea(df,cols,de=None,bar=0.9):
    from scipy.stats import pearsonr
    xcols = []
    for c,i in enumerate(cols[:-1]):
        for j in cols[c+1:]:
            if i==j:
                continue
            #score = pearsonr(df[i],df[j])[0]
            score = df[i].corr(df[j])
            #print(i,j,score)
            if score>bar:
                df["%s-%s"%(i,j)] = df[i]-df[j]
                if de is not None:
                    de["%s-%s"%(i,j)] = de[i]-de[j]
                xcols.append(j)
            if score<-bar:
                df["%s+%s"%(i,j)] = df[i]+df[j]
                if de is not None:
                    de["%s+%s"%(i,j)] = de[i]+de[j]
                xcols.append(j)
    return xcols
项目:kor2vec    作者:dongjun-Lee    | 项目源码 | 文件源码
def word_sim_test(filename, pos_vectors):
    delim = ','
    actual_sim_list, pred_sim_list = [], []
    missed = 0

    with open(filename, 'r') as pairs:
        for pair in pairs:
            w1, w2, actual_sim = pair.strip().split(delim)

            try:
                w1_vec = create_word_vector(w1, pos_vectors)
                w2_vec = create_word_vector(w2, pos_vectors)
                pred = float(np.inner(w1_vec, w2_vec))
                actual_sim_list.append(float(actual_sim))
                pred_sim_list.append(pred)

            except KeyError:
                missed += 1

    spearman, _ = st.spearmanr(actual_sim_list, pred_sim_list)
    pearson, _ = st.pearsonr(actual_sim_list, pred_sim_list)

    return spearman, pearson, missed
项目:SIF    作者:PrincetonML    | 项目源码 | 文件源码
def sim_getCorrelation(We,words,f, weight4ind, scoring_function, params):
    f = open(f,'r')
    lines = f.readlines()
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    m1 = data_io.seq2weight(x1, m1, weight4ind)
    m2 = data_io.seq2weight(x2, m2, weight4ind)
    scores = scoring_function(We,x1,x2,m1,m2, params)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:SIF    作者:PrincetonML    | 项目源码 | 文件源码
def getCorrelation(model,words,f, params=[]):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = data_io.getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = data_io.prepare_data(seq1)
    x2,m2 = data_io.prepare_data(seq2)
    if params and params.weightfile:
        m1 = data_io.seq2weight(x1, m1, params.weight4ind)
        m2 = data_io.seq2weight(x2, m2, params.weight4ind)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:lstm_handwriting    作者:shubh24    | 项目源码 | 文件源码
def get_pearson_coeff(similar_stroke):
    stroke1 = similar_stroke[0]
    stroke2 = similar_stroke[1]
    min_len = min(len(stroke1), len(stroke2))
    sx1 = [stroke1[i][0] for i in range(0, min_len)]
    sx2 = [stroke2[i][0] for i in range(0, min_len)]
    sy1 = [stroke1[i][1] for i in range(0, min_len)]
    sy2 = [stroke2[i][1] for i in range(0, min_len)]

    x_pearson = pearsonr(sx1, sy1)[0] 
    y_pearson = pearsonr(sy1, sy2)[0]

    if x_pearson > 0.5 or y_pearson > 0.5:
        print similar_stroke[2], similar_stroke[3]
        print x_pearson, y_pearson
        plt.plot(sx1, label = "Stroke 1 X Co-ordinate")
        plt.plot(sx2, label = "Stroke 2 X Co-ordinate")
        plt.plot(sy1, label = "Stroke 1 Y Co-ordinate")
        plt.plot(sy2, label = "Stroke 2 Y Co-ordinate")
        plt.legend(ncol= 2, fancybox=True)
        plt.show()
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
项目:Hotpot    作者:Liang-Qiu    | 项目源码 | 文件源码
def eval_sts(ycat, y, name, quiet=False):
    """ Evaluate given STS regression-classification predictions and print results. """
    if ycat.ndim == 1:
        ypred = ycat
    else:
        ypred = loader.sts_categorical2labels(ycat)
    if y.ndim == 1:
        ygold = y
    else:
        ygold = loader.sts_categorical2labels(y)
    pr = pearsonr(ypred, ygold)[0]
    sr = spearmanr(ypred, ygold)[0]
    e = mse(ypred, ygold)
    if not quiet:
        print('%s Pearson: %f' % (name, pr,))
        print('%s Spearman: %f' % (name, sr,))
        print('%s MSE: %f' % (name, e,))
    return STSRes(pr, sr, e)
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
    if len(p1.split()[0].split('_')) == 2:
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
    else:
            X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:Learning-sentence-representation-with-guidance-of-human-attention    作者:wangshaonan    | 项目源码 | 文件源码
def getCorrelation2(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    sseq1 = []
    sseq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[1]; p2 = i[2]; score = float(i[0])
        X1, X2, SX1, SX2 = getSeqs2(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
    sseq1.append(SX1)
    sseq2.append(SX2)
        golds.append(score)
    x1,m1,s1 = utils.prepare_data2(seq1,sseq1)
    x2,m2,s2 = utils.prepare_data2(seq2,sseq2)
    scores = model.scoring_function2(x1,x2,m1,m2,s1,s2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:rsmtool    作者:EducationalTestingService    | 项目源码 | 文件源码
def test_compute_correlations_between_versions_default_columns():
    df_old = pd.DataFrame({'spkitemid': ['a', 'b', 'c'],
                           'feature1': [1.3, 1.5, 2.1],
                           'feature2': [1.1, 6.2, 2.1],
                           'sc1': [2, 3, 4]})
    df_new = pd.DataFrame({'spkitemid': ['a', 'b', 'c'],
                           'feature1': [-1.3, -1.5, -2.1],
                           'feature2': [1.1, 6.2, 2.1],
                           'sc1': [2, 3, 4]})
    df_cors = compute_correlations_between_versions(df_old, df_new)
    assert_equal(df_cors.get_value('feature1', 'old_new'), -1.0)
    assert_equal(df_cors.get_value('feature2', 'old_new'), 1.0)
    assert_equal(df_cors.get_value('feature1', 'human_old'), pearsonr(df_old['feature1'],
                                                                      df_old['sc1'])[0])
    assert_equal(df_cors.get_value('feature1', 'human_new'), pearsonr(df_new['feature1'],
                                                                      df_new['sc1'])[0])
    assert_equal(df_cors.get_value('feature1', "N"), 3)
项目:rsmtool    作者:EducationalTestingService    | 项目源码 | 文件源码
def test_compute_correlations_between_versions_custom_columns():
    df_old = pd.DataFrame({'id': ['a', 'b', 'c'],
                           'feature1': [1.3, 1.5, 2.1],
                           'feature2': [1.1, 6.2, 2.1],
                           'r1': [2, 3, 4]})
    df_new = pd.DataFrame({'id': ['a', 'b', 'c'],
                           'feature1': [-1.3, -1.5, -2.1],
                           'feature2': [1.1, 6.2, 2.1],
                           'r1': [2, 3, 4]})

    df_cors = compute_correlations_between_versions(df_old,
                                                    df_new,
                                                    human_score='r1',
                                                    id_column='id')

    assert_equal(df_cors.get_value('feature1', 'old_new'), -1.0)
    assert_equal(df_cors.get_value('feature2', 'old_new'), 1.0)
    assert_equal(df_cors.get_value('feature1', 'human_old'), pearsonr(df_old['feature1'],
                                                                      df_old['r1'])[0])
    assert_equal(df_cors.get_value('feature1', 'human_new'), pearsonr(df_new['feature1'],
                                                                      df_new['r1'])[0])
    assert_equal(df_cors.get_value('feature1', "N"), 3)
项目:reuters-docsim    作者:sujitpal    | 项目源码 | 文件源码
def plot_correlation(X, Y, title, corr=None):
    if corr == None:
        corr, _ = stats.pearsonr(X, Y)
    # extract 90-th percentile
    thresh = np.percentile(Y, 99)
    X90 = X[X > thresh]
    Y90 = Y[X > thresh]
    sample = np.random.choice(X90.shape[0], size=100, replace=False)
    Xsample = X90[sample]
    Ysample = Y90[sample]
    plt.scatter(Xsample, Ysample, color="red")
    plt.xlim([np.min(Xsample), np.max(Xsample)])
    plt.ylim([np.min(Ysample), np.max(Ysample)])
    plt.title("{:s} (corr: {:.3f})".format(title, corr))
    plt.xlabel("X")
    plt.ylabel("Y")
项目:wub    作者:nanoporetech    | 项目源码 | 文件源码
def _corrfunc(x, y, **kws):
    """ Annotate grid with correaltion coefficient.
    Solution from http://stackoverflow.com/a/30942817
    """
    if args.c == 'spearman':
        r, _ = stats.spearmanr(x, y)
        corr_type = 'Rho'
    elif args.c == 'pearson':
        r, _ = stats.pearsonr(x, y)
        corr_type = 'r'
    else:
        raise Exception('Invalid correlation statistic.')
    correlations.append(r)
    ax = plotter.plt.gca()
    ax.annotate("{} = {:.2f}".format(corr_type, r),
                xy=(.1, .9), xycoords=ax.transAxes)
项目:ConversationalQA    作者:btjhjeon    | 项目源码 | 文件源码
def train_model(lrmodel, X, Y, devX, devY, devscores):
    """
    Train model, using pearsonr on dev for early stopping
    """
    done = False
    best = -1.0
    r = np.arange(1,6)

    while not done:
        # Every 100 epochs, check Pearson on development set
        lrmodel.fit(X, Y, verbose=2, shuffle=False, validation_data=(devX, devY))
        yhat = np.dot(lrmodel.predict_proba(devX, verbose=2), r)
        score = pearsonr(yhat, devscores)[0]
        if score > best:
            print score
            best = score
            bestlrmodel = copy.deepcopy(lrmodel)
        else:
            done = True

    yhat = np.dot(bestlrmodel.predict_proba(devX, verbose=2), r)
    score = pearsonr(yhat, devscores)[0]
    print 'Dev Pearson: ' + str(score)
    return bestlrmodel
项目:muffnn    作者:civisanalytics    | 项目源码 | 文件源码
def test_partial_fit():
    data = load_diabetes()
    clf = MLPRegressor(n_epochs=1)

    X, y = data['data'], data['target']

    for _ in range(30):
        clf.partial_fit(X, y)

    y_pred = clf.predict(X)
    assert pearsonr(y_pred, y)[0] > 0.5
项目:idea_relations    作者:Noahs-ARK    | 项目源码 | 文件源码
def joint_plot(x, y, xlabel=None,
               ylabel=None, xlim=None, ylim=None,
               loc="best", color='#0485d1',
               size=8, markersize=50, kind="kde",
               scatter_color="r"):
    with sns.axes_style("darkgrid"):
        if xlabel and ylabel:
            g = SubsampleJointGrid(xlabel, ylabel,
                    data=DataFrame(data={xlabel: x, ylabel: y}),
                    space=0.1, ratio=2, size=size, xlim=xlim, ylim=ylim)
        else:
            g = SubsampleJointGrid(x, y, size=size,
                    space=0.1, ratio=2, xlim=xlim, ylim=ylim)
        g.plot_joint(sns.kdeplot, shade=True, cmap="Blues")
        g.plot_sub_joint(plt.scatter, 1000, s=20, c=scatter_color, alpha=0.3)
        g.plot_marginals(sns.distplot, kde=False, rug=False)
        g.annotate(ss.pearsonr, fontsize=25, template="{stat} = {val:.2g}\np = {p:.2g}")
        g.ax_joint.set_yticklabels(g.ax_joint.get_yticks())
        g.ax_joint.set_xticklabels(g.ax_joint.get_xticks())
    return g
项目:fitbit-analyzer    作者:5agado    | 项目源码 | 文件源码
def plotCorrelation(stats):
    #columnsToDrop = ['sleep_interval_max_len', 'sleep_interval_min_len',
    #                 'sleep_interval_avg_len', 'sleep_inefficiency',
    #                 'sleep_hours', 'total_hours']

    #stats = stats.drop(columnsToDrop, axis=1)

    g = sns.PairGrid(stats)
    def corrfunc(x, y, **kws):
        r, p = scipystats.pearsonr(x, y)
        ax = plt.gca()
        ax.annotate("r = {:.2f}".format(r),xy=(.1, .9), xycoords=ax.transAxes)
        ax.annotate("p = {:.2f}".format(p),xy=(.2, .8), xycoords=ax.transAxes)
        if p>0.04:
            ax.patch.set_alpha(0.1)

    g.map_upper(plt.scatter)
    g.map_diag(plt.hist)
    g.map_lower(sns.kdeplot, cmap="Blues_d")
    g.map_upper(corrfunc)
    sns.plt.show()
项目:Default-Credit-Card-Prediction    作者:AlexPnt    | 项目源码 | 文件源码
def pearson_correlation_matrix(X):
    """
    Computes the Pearson Correlation matrix

    Keyword arguments:
    X -- The feature vectors
    """

    n_features=len(X[0])
    correlation_matrix=np.zeros(shape=(n_features,n_features))
    for i in xrange(n_features):
        for j in xrange(n_features):
            pearson_corr=stats.pearsonr(X[:,i],X[:,j])[0]
            correlation_matrix[i][j]=pearson_corr

    return correlation_matrix
项目:Default-Credit-Card-Prediction    作者:AlexPnt    | 项目源码 | 文件源码
def pearson_between_feature_class(X,y,threshold):
    """
    Computes the Pearson Correlation between each feature and the target class and keeps the higlhy correlated features-class

    Keyword arguments:
    X -- The feature vectors
    y -- The target vector
    threshold -- Threshold value used to decide which features to keep (above the threshold)
    """

    if verbose:
        print '\nPerforming Feature Selection based on the correlation between each feature and class ...'

    feature_indexes=[]
    for i in xrange(len(X[0])):
        if abs(stats.pearsonr(X[:,i],y)[0])>threshold:
            feature_indexes+=[i]

    if len(feature_indexes)!=0:
        return X[:,feature_indexes],feature_indexes     #return selected features and original index features
    else:
        return X,feature_indexes
项目:iclr2016    作者:jwieting    | 项目源码 | 文件源码
def getCorrelation(model,words,f):
    f = open(f,'r')
    lines = f.readlines()
    preds = []
    golds = []
    seq1 = []
    seq2 = []
    for i in lines:
        i = i.split("\t")
        p1 = i[0]; p2 = i[1]; score = float(i[2])
        X1, X2 = getSeqs(p1,p2,words)
        seq1.append(X1)
        seq2.append(X2)
        golds.append(score)
    x1,m1 = utils.prepare_data(seq1)
    x2,m2 = utils.prepare_data(seq2)
    scores = model.scoring_function(x1,x2,m1,m2)
    preds = np.squeeze(scores)
    return pearsonr(preds,golds)[0], spearmanr(preds,golds)[0]
项目:numerai    作者:gansanay    | 项目源码 | 文件源码
def pearson(X, y):
    r = []
    p = []
    for c in X.columns:
        r_, p_ = pearsonr(X[c], y)
        r.append(r_)
        p.append(p_)
    dfr = pd.DataFrame(index=range(1, 1+len(X.columns)))
    dfr['pearson'] = r
    dfr['pearson_p'] = p
    return dfr
项目:systematic-metafeatures    作者:fhpinto    | 项目源码 | 文件源码
def _calculate(self, input):
        input = input[~np.isnan(input).any(axis=1)]

        return pearsonr(input[:,0], input[:,1])
项目:PersonalizedMultitaskLearning    作者:mitmedialab    | 项目源码 | 文件源码
def calcCorrelation(df,col1,col2):
    x,y,n = discardNans(df,col1,col2)
    return stats.pearsonr(x, y)
项目:SentEval    作者:facebookresearch    | 项目源码 | 文件源码
def run(self):
        self.nepoch = 0
        bestpr = -1
        early_stop_count = 0
        r = np.arange(1, 6)
        stop_train = False

        # Preparing data
        trainX, trainy, devX, devy, testX, testy = self.prepare_data(
            self.train['X'], self.train['y'],
            self.valid['X'], self.valid['y'],
            self.test['X'], self.test['y'])

        # Training
        while not stop_train and self.nepoch <= self.maxepoch:
            self.trainepoch(trainX, trainy, nepoches=50)
            yhat = np.dot(self.predict_proba(devX), r)
            pr = pearsonr(yhat, self.devscores)[0]
            # early stop on Pearson
            if pr > bestpr:
                bestpr = pr
                bestmodel = copy.deepcopy(self.model)
            elif self.early_stop:
                if early_stop_count >= 3:
                    stop_train = True
                early_stop_count += 1
        self.model = bestmodel

        yhat = np.dot(self.predict_proba(testX), r)

        return bestpr, yhat
项目:DriverPower    作者:smshuai    | 项目源码 | 文件源码
def report_metrics(yhat, y):
    # report metrics of training set
    r2 = r2_score(y, yhat)
    var_exp = explained_variance_score(y, yhat)
    r = stats.pearsonr(yhat, y)[0]
    logger.info('Model metrics for training set: r2={:.2f}, Variance explained={:.2f}, Pearson\'r={:.2f}'.format(r2, var_exp, r))
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def return_correlations(instances, labels):
    feature_correlation = {}
    nplabels = numpy.array(labels)
    for i in range(instances.shape[1]):
        feature_vals = instances[:,i].toarray()
        corr,p = stats.pearsonr(feature_vals,nplabels)
        feature_correlation[i] = [corr,p]
    return feature_correlation[i]
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def calculate_ordinal_correlation_feature_labels(instances,labels):
    # calculate correlation by feature
    feature_correlation = []
    for i in range(instances.shape[1]):
        feature_vals = instances[:,i].transpose().toarray()[0]
        try:
            corr,p = stats.pearsonr(feature_vals,labels)
            if math.isnan(corr):
                corr = 0
        except:
            corr = 0
        feature_correlation.append([i,abs(corr),corr,p])
    sorted_feature_correlation = sorted(feature_correlation,key=lambda k : k[1],reverse=True)
    return sorted_feature_correlation
项目:quoll    作者:LanguageMachines    | 项目源码 | 文件源码
def calculate_feature_correlation(instances):
    # calculate correlation by feature
    feature_correlation = []
    for i in range(instances.shape[1]):
        feature_vals_i = instances[:,i].transpose().toarray()[0]
        for j in range(i+1,instances.shape[1]):
            feature_vals_j = instances[:,j].transpose().toarray()[0]
            try:
                corr,p = stats.pearsonr(feature_vals_i,feature_vals_j)
                if math.isnan(corr):
                    corr = 0
            except:
                corr = 0
            feature_correlation.append([i,j,abs(corr),corr,p])
    return feature_correlation
项目:MP-CNN-Variants    作者:tuzhucheng    | 项目源码 | 文件源码
def get_scores(self):
        self.model.eval()
        num_classes = self.dataset_cls.NUM_CLASSES
        predict_classes = torch.arange(1, num_classes + 1).expand(self.batch_size, num_classes)
        test_kl_div_loss = 0
        predictions = []
        true_labels = []

        for batch in self.data_loader:
            output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
            test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0]
            # handle last batch which might have smaller size
            if len(predict_classes) != len(batch.sentence_1):
                predict_classes = torch.arange(1, num_classes + 1).expand(len(batch.sentence_1), num_classes)

            if self.data_loader.device != -1:
                with torch.cuda.device(self.device):
                    predict_classes = predict_classes.cuda()

            true_labels.append((predict_classes * batch.label.data).sum(dim=1))
            predictions.append((predict_classes * output.data.exp()).sum(dim=1))

            del output

        predictions = torch.cat(predictions).cpu().numpy()
        true_labels = torch.cat(true_labels).cpu().numpy()
        test_kl_div_loss /= len(batch.dataset.examples)
        pearson_r = pearsonr(predictions, true_labels)[0]
        spearman_r = spearmanr(predictions, true_labels)[0]

        return [pearson_r, spearman_r, test_kl_div_loss], ['pearson_r', 'spearman_r', 'KL-divergence loss']
项目:MP-CNN-Variants    作者:tuzhucheng    | 项目源码 | 文件源码
def get_scores(self):
        self.model.eval()
        num_classes = self.dataset_cls.NUM_CLASSES
        predict_classes = torch.arange(0, num_classes).expand(self.batch_size, num_classes)
        test_kl_div_loss = 0
        predictions = []
        true_labels = []

        for batch in self.data_loader:
            output = self.model(batch.sentence_1, batch.sentence_2, batch.ext_feats)
            test_kl_div_loss += F.kl_div(output, batch.label, size_average=False).data[0]
            # handle last batch which might have smaller size
            if len(predict_classes) != len(batch.sentence_1):
                predict_classes = torch.arange(0, num_classes).expand(len(batch.sentence_1), num_classes)

            if self.data_loader.device != -1:
                with torch.cuda.device(self.device):
                    predict_classes = predict_classes.cuda()

            true_labels.append((predict_classes * batch.label.data).sum(dim=1))
            predictions.append((predict_classes * output.data.exp()).sum(dim=1))

            del output

        predictions = torch.cat(predictions).cpu().numpy()
        true_labels = torch.cat(true_labels).cpu().numpy()
        test_kl_div_loss /= len(batch.dataset.examples)
        pearson_r = pearsonr(predictions, true_labels)[0]

        return [pearson_r, test_kl_div_loss], ['pearson_r', 'KL-divergence loss']
项目:aes    作者:feidong1991    | 项目源码 | 文件源码
def pearson(y_true, y_pred):
    """
    Calculate Pearson product-moment correlation coefficient between ``y_true``
    and ``y_pred``.

    :param y_true: The true/actual/gold labels for the data.
    :type y_true: array-like of float
    :param y_pred: The predicted/observed labels for the data.
    :type y_pred: array-like of float

    :returns: Pearson product-moment correlation coefficient if well-defined,
              else 0
    """
    ret_score = pearsonr(y_true, y_pred)[0]
    return ret_score if not np.isnan(ret_score) else 0.0
项目:LDA_RecEngine    作者:easonchan1213    | 项目源码 | 文件源码
def pearson_correlation(a,b,topics):
    from scipy.stats import pearsonr
    a = fill_list_from_dict(a,topics)
    b = fill_list_from_dict(b,topics)
    return pearsonr(a,b)[0]
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def test_phase_randomize():
    from brainiak.utils.utils import phase_randomize
    import numpy as np
    from scipy.fftpack import fft
    import math
    from scipy.stats import pearsonr

    # Generate auto-correlated signals
    nv = 2
    T = 100
    ns = 3
    D = np.zeros((nv, T, ns))
    for v in range(nv):
        for s in range(ns):
            D[v, :, s] = np.sin(np.linspace(0, math.pi * 5 * (v + 1), T)) + \
                         np.sin(np.linspace(0, math.pi * 6 * (s + 1), T))

    freq = fft(D, axis=1)
    D_pr = phase_randomize(D)
    freq_pr = fft(D_pr, axis=1)
    p_corr = pearsonr(np.angle(freq).flatten(), np.angle(freq_pr).flatten())[0]

    assert np.isclose(abs(freq), abs(freq_pr)).all(), \
        "Amplitude spectrum not preserved under phase randomization"

    assert abs(p_corr) < 0.03, \
        "Phases still correlated after randomization"
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def plot_pearson(name):
    """Plot the pearsin coeff of  the neurons for each layer"""
    data_array = utils.get_data(name)
    ws = data_array['weights']
    f = plt.figure(figsize=(12, 8))
    axes = f.add_subplot(111)
    #The number of neurons in each layer -
    #TODO need to change it to be auto
    sizes =[10,7, 5, 4,3,2 ]
    #The mean of pearson coeffs of all the layers
    pearson_mean =[]
    #Go over all the layers
    for layer in range(len(sizes)):
        inner_pearson_mean =[]
        #Go over all the weights in the layer
        for k in range(len(ws)):
            ws_current = np.squeeze(ws[k][0][0][-1])
            #Go over the neurons
            for neuron in range(len(ws_current[layer])):
                person_t = []
                #Go over the rest of the neurons
                for neuron_second in range(neuron+1, len(ws_current[layer])):
                    pearson_c, p_val =sis.pearsonr(ws_current[layer][neuron], ws_current[layer][neuron_second])
                    person_t.append(pearson_c)
            inner_pearson_mean.append(np.mean(person_t))
        pearson_mean.append(np.mean(inner_pearson_mean))
    #Plot the coeff
    axes.bar(np.arange(1,7), np.abs(np.array(pearson_mean))*np.sqrt(sizes), align='center')
    axes.set_xlabel('Layer')
    axes.set_ylabel('Abs(Pearson)*sqrt(N_i)')
    rects = axes.patches
    # Now make some labels
    labels = ["L%d (%d nuerons)" % (i, j) for i, j in zip(range(len(rects)), sizes)]
    plt.xticks(np.arange(1,7), labels)
项目:brainpipe    作者:EtienneCmb    | 项目源码 | 文件源码
def circ_corrcc(alpha, x):
    """Correlation coefficient between one circular and one linear random
    variable.

    Args:
        alpha: vector
            Sample of angles in radians

        x: vector
            Sample of linear random variable

    Returns:
        rho: float
            Correlation coefficient

        pval: float
            p-value

    Code taken from the Circular Statistics Toolbox for Matlab
    By Philipp Berens, 2009
    Python adaptation by Etienne Combrisson
    """
    if len(alpha) is not len(x):
        raise ValueError('The length of alpha and x must be the same')
    n = len(alpha)

    # Compute correlation coefficent for sin and cos independently
    rxs = pearsonr(x,np.sin(alpha))[0]
    rxc = pearsonr(x,np.cos(alpha))[0]
    rcs = pearsonr(np.sin(alpha),np.cos(alpha))[0]

    # Compute angular-linear correlation (equ. 27.47)
    rho = np.sqrt((rxc**2 + rxs**2 - 2*rxc*rxs*rcs)/(1-rcs**2));

    # Compute pvalue
    pval = 1 - chi2.cdf(n*rho**2,2);

    return rho, pval
项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def compare_distances(A,B,random_samples=[],s=200,pvalues=False):
    if len(random_samples) == 0:
        random_samples = np.zeros(A.shape[1],dtype=np.bool)
        random_samples[:min(s,A.shape[1])] = True
        np.random.shuffle(random_samples)
    dist_x = distance.pdist(A[:,random_samples].T,'euclidean')
    dist_y = distance.pdist(B[:,random_samples].T,'euclidean')
    pear = pearsonr(dist_x,dist_y)
    spear = spearmanr(dist_x,dist_y)
    if pvalues:
        return pear,spear
    else:
        return pear[0],spear[0]
项目:DeepLearn    作者:GauravBh1010tt    | 项目源码 | 文件源码
def sum_corr(view1,view2,flag=''):

    print("test correlation")
    corr = 0
    for i,j in zip(view1,view2):
        corr += measures.pearsonr(i,j)[0]
    print('avg sum corr ::',flag,'::',corr/len(view1))
项目:DeepLearn    作者:GauravBh1010tt    | 项目源码 | 文件源码
def cal_sim(model,ind1,ind2=1999):
    view1 = np.load("test_v1.npy")[0:ind1]
    view2 = np.load("test_v2.npy")[0:ind2]
    label1 = np.load('test_l.npy')
    x1 = project(model,[view1,np.zeros_like(view1)])
    x2 = project(model,[np.zeros_like(view2),view2])
    label2 = []
    count = 0
    MAP=0
    for i,j in enumerate(x1):
        cor = []
        AP=0
        for y in x2:
            temp1 = j.tolist()
            temp2 = y.tolist()
            cor.append(pearsonr(temp1,temp2))
        #if i == np.argmax(cor):
        #    count+=1
        #val=[(q,(i*ind1+p))for p,q in enumerate(cor)]
        val=[(q,p)for p,q in enumerate(cor)]
        val.sort()
        val.reverse()
        label2.append(val[0:4])
        t = [w[1]for w in val[0:7]]
        #print t
        for x,y in enumerate(t):
            if y in range(i,i+5):
                AP+=1/(x+1)
        print(t)
        print(AP)
        MAP+=AP
    #print 'accuracy  :- ',float(count)*100/ind1,'%'
    print('MAP is : ',MAP/ind1)
项目:wordsim    作者:recski    | 项目源码 | 文件源码
def pearson_scorer(estimator, X, y):
    logging.info('predicting ...')
    predicted = estimator.predict(y)
    return pearsonr(list(predicted), y)
项目:aes-gated-word-char    作者:unkn0wnxx    | 项目源码 | 文件源码
def calc_correl(self, dev_pred, test_pred):
        dev_prs, _ = pearsonr(dev_pred, self.dev_y_org)
        test_prs, _ = pearsonr(test_pred, self.test_y_org)
        dev_spr, _ = spearmanr(dev_pred, self.dev_y_org)
        test_spr, _ = spearmanr(test_pred, self.test_y_org)
        dev_tau, _ = kendalltau(dev_pred, self.dev_y_org)
        test_tau, _ = kendalltau(test_pred, self.test_y_org)
        return dev_prs, test_prs, dev_spr, test_spr, dev_tau, test_tau
项目:simec    作者:cod3licious    | 项目源码 | 文件源码
def check_similarity_match(X_embed, S):
    """
    Since SimEcs are supposed to project the data into an embedding space where the target similarities
    can be linearly approximated, check if X_embed*X_embed^T = S
    (check mean squared error and Spearman correlation coefficient)
    Inputs:
        - X_embed: Nxd matrix with coordinates in the embedding space
        - S: NxN matrix with target similarities (do whatever transformations were done before using this
             as input to the SimEc, e.g. centering, etc.)
    Returns:
        - msq, rho, r: mean squared error, Spearman and Pearson correlation coefficent between linear kernel of embedding
                       and target similarities (mean squared error is more exact, corrcoef a more relaxed error measure)
    """
    # compute linear kernel as approximated similarities
    S_approx = X_embed.dot(X_embed.T)
    # to get results that are comparable across similarity measures, we have to normalize them somehow,
    # in this case by dividing by the absolute max value of the target similarity matrix
    n = np.max(np.abs(S))
    S_norm = S/n
    S_approx /= n
    # compute mean squared error
    msqe = np.mean((S_norm - S_approx) ** 2)
    # compute Spearman correlation coefficient
    rho = spearmanr(S_norm.flatten(), S_approx.flatten())[0]
    # compute Pearson correlation coefficient
    r = pearsonr(S_norm.flatten(), S_approx.flatten())[0]
    return msqe, rho, r
项目:microTC    作者:INGEOTEC    | 项目源码 | 文件源码
def compute_score(self, conf, hy):
        conf['_r2'] = r2_score(self.test_y, hy)
        conf['_spearmanr'] = spearmanr(self.test_y, hy)[0]
        conf['_pearsonr'] = pearsonr(self.test_y, hy)[0]
        conf['_score'] = conf['_' + self.score]
        # print(conf)
项目:tensorflow-cnn-time-series    作者:philipperemy    | 项目源码 | 文件源码
def generate_two_correlated_time_series(size, rho):
    num_samples = size
    num_variables = 2
    cov = [[1.0, rho], [rho, 1.0]]

    L = np.linalg.cholesky(cov)

    uncorrelated = np.random.standard_normal((num_variables, num_samples))
    correlated = np.dot(L, uncorrelated)
    x, y = correlated
    rho, p_val = stats.pearsonr(x, y)
    return x, y, rho
项目:Building-Machine-Learning-Systems-With-Python-Second-Edition    作者:PacktPublishing    | 项目源码 | 文件源码
def _plot_correlation_func(x, y):

    r, p = pearsonr(x, y)
    title = "Cor($X_1$, $X_2$) = %.3f" % r
    pylab.scatter(x, y)
    pylab.title(title)
    pylab.xlabel("$X_1$")
    pylab.ylabel("$X_2$")

    f1 = scipy.poly1d(scipy.polyfit(x, y, 1))
    pylab.plot(x, f1(x), "r--", linewidth=2)
    # pylab.xticks([w*7*24 for w in [0,1,2,3,4]], ['week %i'%(w+1) for w in
    # [0,1,2,3,4]])
项目:kerpy    作者:oxmlcs    | 项目源码 | 文件源码
def SubCorr_statistic(self,data_x=None,data_y=None):
        if data_x is None:
            data_x=self.data_x
        if data_y is None:
            data_y=self.data_y
        dx = shape(data_x)[1]
        stats_value = zeros(dx)
        for dd in range(dx):
            stats_value[dd] = pearsonr(data_x[:,[dd]],data_y)[0]**2
        SubCorr = sum(stats_value)/float(dx)
        return SubCorr
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_corr(self):
        tm._skip_if_no_scipy()

        import scipy.stats as stats

        # full overlap
        self.assertAlmostEqual(self.ts.corr(self.ts), 1)

        # partial overlap
        self.assertAlmostEqual(self.ts[:15].corr(self.ts[5:]), 1)

        self.assertTrue(isnull(self.ts[:15].corr(self.ts[5:], min_periods=12)))

        ts1 = self.ts[:15].reindex(self.ts.index)
        ts2 = self.ts[5:].reindex(self.ts.index)
        self.assertTrue(isnull(ts1.corr(ts2, min_periods=12)))

        # No overlap
        self.assertTrue(np.isnan(self.ts[::2].corr(self.ts[1::2])))

        # all NA
        cp = self.ts[:10].copy()
        cp[:] = np.nan
        self.assertTrue(isnull(cp.corr(cp)))

        A = tm.makeTimeSeries()
        B = tm.makeTimeSeries()
        result = A.corr(B)
        expected, _ = stats.pearsonr(A, B)
        self.assertAlmostEqual(result, expected)
项目:NLPWorks    作者:thautwarm    | 项目源码 | 文件源码
def de_ps(X,y):
    dim = X.shape[1]
    de = min(2000,dim)
    clf = SelectKBest(lambda X, Y: np.array(map(lambda x:pearsonr(x, Y), X.T)).T, k=de)
    clf.fit(X,y)
    def _func(X1,X2):
        return clf.transform(X1),clf.transform(X2)
    return _func
项目:miniMDS    作者:seqcode    | 项目源码 | 文件源码
def pearson(mat1, mat2):
    """Root mean square error between two matrices, ignoring zeroes"""
    assert mat1.shape == mat2.shape
    #convert to vectors
    vec1 = mat1.flatten()
    vec2 = mat2.flatten()

    #remove zeroes
    nonzero = [i for i in range(len(vec1)) if vec1[i] != 0 and vec2[i] != 0]
    vec1 = vec1[nonzero]
    vec2 = vec2[nonzero]

    r, p = st.pearsonr(vec1, vec2)
    return r
项目:singularity-python    作者:singularityware    | 项目源码 | 文件源码
def RSA(m1,m2):
    '''RSA analysis will compare the similarity of two matrices
    '''
    from scipy.stats import pearsonr
    import scipy.linalg
    import numpy

    # This will take the diagonal of each matrix (and the other half is changed to nan) and flatten to vector
    vectorm1 = m1.mask(numpy.triu(numpy.ones(m1.shape)).astype(numpy.bool)).values.flatten()
    vectorm2 = m2.mask(numpy.triu(numpy.ones(m2.shape)).astype(numpy.bool)).values.flatten()
    # Now remove the nans
    m1defined = numpy.argwhere(~numpy.isnan(numpy.array(vectorm1,dtype=float)))
    m2defined = numpy.argwhere(~numpy.isnan(numpy.array(vectorm2,dtype=float)))
    idx = numpy.intersect1d(m1defined,m2defined)
    return pearsonr(vectorm1[idx],vectorm2[idx])[0]
项目:RankIQA    作者:xialeiliu    | 项目源码 | 文件源码
def forward(self, bottom, top):
        """Compute the SROCC and LCC and output them to top."""
        #ipdb.set_trace()
        testPreds = bottom[0].data
        testPreds = np.reshape(testPreds,testPreds.shape[0])
        testLabels = bottom[1].data
        testLabels = np.reshape(testLabels,testLabels.shape[0])
        top[0].data[...] = stats.spearmanr(testPreds, testLabels)[0]
        top[1].data[...] = stats.pearsonr(testPreds, testLabels)[0]