Python sklearn.metrics 模块,precision_recall_curve() 实例源码


def threshold_estimate(x,y):
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(x, y, test_size=0.1, random_state=0)
    weight = float(len(y_train[y_train == 0]))/float(len(y_train[y_train == 1]))
    w1 = np.array([1]*y_train.shape[0])
    print("samples: %d %d %f" % (x_train.shape[0], x_test.shape[0], weight))
    estimator = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=1000, nthread=50), y_train, sample_weight=w1)
    y_scores = estimator.predict_proba(x_test)[:,1]
    precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
    f1 = 2*precision[2:]*recall[2:]/(precision[2:]+recall[2:])
    m_idx = np.argmax(f1)
    m_thresh = thresholds[2+m_idx]
    print("%d %f %f" % (precision.shape[0], f1[m_idx], m_thresh))
    return m_thresh

# Estimate threshold for the classifier using inner-round cross validation
def _update_tsg_metrics(self, y_true, y_pred, prob):
        self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
        self.tsg_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.tsg_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        tsg_col = 1  # column for metrics relate to tsg
        self.tsg_precision[self.num_pred] = prec[tsg_col]
        self.tsg_recall[self.num_pred] = recall[tsg_col]
        self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
        self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
        #self.tsg_tpr_array[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p)
def plot_PR_by_class(y_pred, y_true, classes, out_path):

    best_thresh = {}
    for class_name, c in classes.items():  # for each class

        # Compute ROC curve
        precision, recall, thresholds = precision_recall_curve(y_true[:, c], y_pred[:, c])
        pr_auc = auc(recall, precision)

        # Plot PR curve
        plt.plot(recall, precision, label='{}, AUC = {:.3f}'.format(class_name, pr_auc))

        # Calculate J statistic
        J = [j_statistic(y_true, y_pred, t) for t in thresholds]
        j_best = np.argmax(J)

        # Store best threshold for each class
        best_thresh[class_name] = J[j_best]

    return best_thresh
def cv(feature_dict, feature, polarity, folds):
    kfold = KFold(len(polarity), n_folds = folds)
    count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
    for train, test in kfold:
        LR = LogisticRegression()
        count += 1
        x = [(feature[i]) for i in train]
        y = [(polarity[i])for i in train], (y))

        test_label = []
        answer_label = [(polarity[j]) for j in test]
        for j in test:
            query = feature[j]
            result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
        pre, rec, thr = precision_recall_curve(answer_label, test_label)
        return pre, rec, thr
    return accuracy, precision, recall, f1
def sklearn_purity_completeness(score_export):
    golds, probs = zip(*score_export.roc())
    golds = np.array(golds)
    probs = np.array(probs)

    purity, completeness, _ = precision_recall_curve(golds, probs)

    plt.plot(completeness, purity, lw=2, color='navy',
             label='Precision-Recall curve')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    # plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
    plt.legend(loc="lower left")
def test_precision_recall_curve():
    y_true, _, probas_pred = make_prediction(binary=True)
    _test_precision_recall_curve(y_true, probas_pred)

    # Use {-1, 1} for labels; make sure original labels aren't modified
    y_true[np.where(y_true == 0)] = -1
    y_true_copy = y_true.copy()
    _test_precision_recall_curve(y_true, probas_pred)
    assert_array_equal(y_true_copy, y_true)

    labels = [1, 0, 0, 1]
    predict_probas = [1, 2, 3, 4]
    p, r, t = precision_recall_curve(labels, predict_probas)
    assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.]))
    assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.]))
    assert_array_almost_equal(t, np.array([1, 2, 3, 4]))
    assert_equal(p.size, r.size)
    assert_equal(p.size, t.size + 1)
def _test_precision_recall_curve(y_true, probas_pred):
    # Test Precision-Recall and aread under PR curve
    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.85, 2)
                              average_precision_score(y_true, probas_pred))
    assert_almost_equal(_average_precision(y_true, probas_pred),
                        precision_recall_auc, 1)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
    # Smoke test in the case of proba having only one value
    p, r, thresholds = precision_recall_curve(y_true,
    precision_recall_auc = auc(r, p)
    assert_array_almost_equal(precision_recall_auc, 0.75, 3)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
def drawGraphsPeriod(data, start, end, date):
    def drawGraphsPeriod(data, start, end, date):
    :param data: ?????? ??
    :param start: ?????? ?????
    :param end: ????? ?????
    :param date: ????
    :param return: ?????? ?? ??????????

    for i in xrange(3, 4):
        actual, predictions = getData(list(data['p' + str(i) + '_Fraud'][start:end]), list(data['CLASS'][start:end]))

        precision, recall, thresholds = precision_recall_curve(actual, predictions)

        plt.plot(recall, precision, label='%s PRC' % ('p' + str(i) + '_Fraud'))

    plt.title('Precision-recall curve for ' + str((date - datetime.timedelta(days=1)).strftime('%Y/%m/%d')))
    plt.legend(loc='lower right', fontsize='small')
def multilabel_precision_recall(y_score, y_test, clf_target_ids, clf_target_names): 
    from sklearn.metrics import precision_recall_curve
    from sklearn.metrics import average_precision_score
    from sklearn.preprocessing import label_binarize

    # Compute Precision-Recall and plot curve
    precision = dict()
    recall = dict()
    average_precision = dict()

    # Find indices that have non-zero detections
    clf_target_map = { k: v for k,v in zip(clf_target_ids, clf_target_names)}
    id2ind = {tid: idx for (idx,tid) in enumerate(clf_target_ids)}

    # Only handle the targets encountered
    unique = np.unique(y_test)
    nzinds = np.int64([id2ind[target] for target in unique])

    # Binarize and create precision-recall curves
    y_test_multi = label_binarize(y_test, classes=unique)
    for i,target in enumerate(unique):
        index = id2ind[target]
        name = clf_target_map[target]
        precision[name], recall[name], _ = precision_recall_curve(y_test_multi[:, i],
                                                                  y_score[:, index])
        average_precision[name] = average_precision_score(y_test_multi[:, i], y_score[:, index])

    # Compute micro-average ROC curve and ROC area
    precision["average"], recall["average"], _ = precision_recall_curve(y_test_multi.ravel(),
    average_precision["micro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
    average_precision["macro"] = average_precision_score(y_test_multi, y_score[:,nzinds],
    return precision, recall, average_precision
def plot_precision_recall(indir, gts_file, outdir):
    groundtruths = read_item_tag(gts_file)

    indir = utils.abs_path_dir(indir)
    for item in os.listdir(indir):
        if ".csv" in item:
            isrcs = read_preds(indir + "/" + item)
            test_groundtruths = []
            predictions = []
            for isrc in isrcs:
                if isrc in groundtruths:
            test_groundtruths = [tag=="s" for tag in test_groundtruths]
            precision, recall, _ = precision_recall_curve(test_groundtruths, predictions)
            plt.plot(recall, precision, label=item[:-4] + " (" + str(round(average_precision_score(test_groundtruths, predictions), 3)) + ")")

    plt.ylim([0.0, 1.05])
    plt.xlim([-0.05, 1.05])
    plt.title('Precision-Recall curve for Algo (AUC)')
    plt.savefig(outdir + "precision_recall.png", dpi=200, bbox_inches="tight")
    utils.print_success("Precision-Recall curve created in " + outdir)
def plot_pr(gold, predicted_prob, lb):
    pp1 = predicted_prob[:,1] # prob for class 1
    p, r, th = precision_recall_curve(gold, pp1)
    ap = average_precision_score(gold, pp1)
    plt.plot(r, p, label= lb + ' (area = {0:0.2f})'

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.title('Precision and Recall')
    plt.legend(loc="upper right")
def Precision(clf):
    doc_class_predicted = clf.predict(x_test) 
    print(np.mean(doc_class_predicted == y_test))#?????????
    precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test))  
    answer = clf.predict_proba(x_test)[:,1]  
    report = answer > 0.5  
    print(classification_report(y_test, report, target_names = ['neg', 'pos']))
    from sklearn.metrics import accuracy_score
    print('???: %.2f' % accuracy_score(y_test, doc_class_predicted))
项目:bionlp17    作者:leebird    | 项目源码 | 文件源码
def generate_prec_recall_points(clf, test_examples, test_labels, pk_file):
    # Generate precision-recall points and store in a pickle file.

    precision = dict()
    recall = dict()
    average_precision = dict()
    thresholds = dict()

    n_classes = len(clf.model.classes_)
    y_test = label_binarize(test_labels, clf.model.classes_)

    y_score = clf.predict_raw_prob(test_examples)
    # It only output 1 column of positive probability.
    y_score = y_score[:, 1:]

    for i in range(n_classes - 1):
        precision[i], recall[i], thresholds[i] = precision_recall_curve(
            y_test[:, i],
            y_score[:, i])
        average_precision[i] = average_precision_score(y_test[:, i],
                                                       y_score[:, i])
    # Compute micro-average ROC curve and ROC area
    precision["micro"], recall["micro"], thresholds['micro'] = \
        precision_recall_curve(y_test.ravel(), y_score.ravel())
    average_precision["micro"] = average_precision_score(y_test, y_score,

    if pk_file is not None:
        with open(pk_file, 'wb') as f:
            pickle.dump((precision, recall, average_precision, thresholds), f)
def calc_pr_metrics(truth_df, score_df):
    recall_array = np.linspace(0, 1, 100)
    p, r, thresh = metrics.precision_recall_curve(truth_df, score_df)
    p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
    thresh = np.insert(thresh, 0, 1.0)
    precision_array = interp(recall_array, r, p)
    threshold_array = interp(recall_array, r, thresh)
    pr_auc = metrics.auc(recall_array, precision_array)
    return precision_array, recall_array, pr_auc
def calc_pr_metrics(truth_df, score_df):
    recall_array = np.linspace(0, 1, 100)
    p, r, thresh = metrics.precision_recall_curve(truth_df, score_df)
    p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
    thresh = np.insert(thresh, 0, 1.0)
    precision_array = interp(recall_array, r, p)
    threshold_array = interp(recall_array, r, thresh)
    pr_auc = metrics.auc(recall_array, precision_array)
    return precision_array, recall_array, pr_auc
def _update_metrics(self, y_true, y_pred,
                        onco_prob, tsg_prob):
        # record which genes were predicted what
        self.driver_gene_pred = pd.Series(y_pred, self.y.index)
        self.driver_gene_score = pd.Series(onco_prob+tsg_prob, self.y.index)

        # evaluate performance
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred,
        cancer_gene_pred = ((onco_prob + tsg_prob)>.5).astype(int)
        self.cancer_gene_count[self.num_pred] = np.sum(cancer_gene_pred)
        self.precision[self.num_pred] = prec
        self.recall[self.num_pred] = recall
        self.f1_score[self.num_pred] = fscore

        # compute Precision-Recall curve metrics
        driver_prob = onco_prob + tsg_prob
        driver_true = (y_true > 0).astype(int)
        p, r, thresh = metrics.precision_recall_curve(driver_true, driver_prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        thresh = np.insert(thresh, 0, 1.0)
        self.driver_precision_array[self.num_pred, :] = interp(self.driver_recall_array, r, p)
        self.driver_threshold_array[self.num_pred, :] = interp(self.driver_recall_array, r, thresh)

        # calculate prediction summary statistics
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(driver_true, cancer_gene_pred)
        self.driver_precision[self.num_pred] = prec[1]
        self.driver_recall[self.num_pred] = recall[1]

        # save driver metrics
        fpr, tpr, thresholds = metrics.roc_curve(driver_true, driver_prob)
        self.driver_tpr_array[self.num_pred, :] = interp(self.driver_fpr_array, fpr, tpr)
项目:2020plus    作者:KarchinLab    | 项目源码 | 文件源码
def _update_onco_metrics(self, y_true, y_pred, prob):
        self.onco_gene_pred = pd.Series(y_pred, self.y.index)
        self.onco_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.onco_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        self.onco_precision[self.num_pred] = prec[self.onco_num]
        self.onco_recall[self.num_pred] = recall[self.onco_num]
        self.onco_f1_score[self.num_pred] = fscore[self.onco_num]
        self.logger.debug('Onco Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.onco_tpr_array[self.num_pred, :] = interp(self.onco_fpr_array, fpr, tpr)
        #self.onco_mean_tpr[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        thresh = np.insert(thresh, 0, 1.0)
        self.onco_precision_array[self.num_pred, :] = interp(self.onco_recall_array, r, p)
        self.onco_threshold_array[self.num_pred, :] = interp(self.onco_recall_array, r, thresh)
def recall_at_precision(*args, **kwargs):
    from sklearn.metrics import precision_recall_curve
    metric_param = kwargs.pop('metric_param')
    required_precision = _parse_number_or_fraction(metric_param)
    precision, recall, thresholds = precision_recall_curve(*args, **kwargs)

    for pr, r in izip(precision, recall):
        if pr >= required_precision:
            return r
项目:script    作者:9468305    | 项目源码 | 文件源码
def auc_pr(real_csv, result_csv):
    label, prob = load_label_prob(real_csv, result_csv)
    precision, recall, _thresholds = metrics.precision_recall_curve(label, prob)
    area = metrics.auc(recall, precision)
    return area
def save_prcurve(prob, answer, model_name, save_fn, use_neg=True):
    save prc curve
    if not use_neg:
        prob_dn = []
        ans_dn = []
        for p in prob:
        for ans in answer:
        prob = np.reshape(np.array(prob_dn), (-1))
        ans = np.reshape(np.array(ans_dn), (-1))
        prob = np.reshape(prob, (-1))
        ans = np.reshape(answer, (-1))

    precision, recall, threshold = precision_recall_curve(ans, prob)
    average_precision = average_precision_score(ans, prob)

    plt.plot(recall[:], precision[:], lw=2, color='navy', label=model_name)
    # plt.ylim([0.3, 1.0])
    # plt.xlim([0.0, 0.4])
    plt.title('Precision-Recall Area={0:0.2f}'.format(average_precision))
    plt.legend(loc="upper right")
def threshold_estimate_cv(x,y,k_fold):
    print "%d %d %d" % (y.shape[0], sum(y==1), sum(y==0))
    kf1 = StratifiedKFold(y, n_folds=k_fold, shuffle=True, random_state=0)
    threshold = np.zeros((k_fold),dtype="float32")
    cnt = 0
    for train_index, test_index in kf1:
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]

        w1 = np.array([1]*y_train.shape[0])
        weight = float(len(y_train[y_train == 0]))/float(len(y_train[y_train == 1]))
        w1 = np.array([1]*y_train.shape[0])

        estimator = xgb.XGBClassifier(max_depth=10, learning_rate=0.1, n_estimators=1000, nthread=50), y_train, sample_weight=w1)
        y_scores = estimator.predict_proba(x_test)[:,1]
        precision, recall, thresholds = precision_recall_curve(y_test, y_scores)
        f1 = 2*precision[2:]*recall[2:]/(precision[2:]+recall[2:])
        m_idx = np.argmax(f1)
        threshold[cnt] = thresholds[2+m_idx]
        cnt += 1
        print("%d %f %f" % (precision.shape[0], f1[m_idx], thresholds[2+m_idx]))
    return np.mean(threshold), threshold

# Cross validation using gradient tree boosting
项目:mappings-autogeneration    作者:dbpedia    | 项目源码 | 文件源码
def scores(self, mdl):
        scores = mdl._scores(,, self.os)
        pr, rc, _ = precision_recall_curve(self.ys, scores)
        roc = roc_auc_score(self.ys, scores)
        return auc(rc, pr), roc
def classify(y, x, test_y, test_x):
    global data_df, factor_name, left, right, feature, ratio, threshold
    y_c = np.zeros(len(y))
    y_c[y > 0.02] = 1
    y_c[y < -0.02] = -1
    min_n = int(0.05 * len(y))
    clf = DecisionTreeClassifier(max_depth=4, min_samples_leaf=min_n), y_c)
    y_p = clf.predict(x)
    fname = "D:\\Cache\\tree.txt"
    test_y = y
    with open(fname, 'w') as f:
        tree.export_graphviz(clf, out_file=f)
    factor_exchange(factor_name, fname)
    left = clf.tree_.children_left
    right = clf.tree_.children_right
    feature = clf.tree_.feature
    threshold = clf.tree_.threshold
    # precision, recall, thresholds = precision_recall_curve(y_c, clf.predict(x))
    print("mean income is:", str(np.average(test_y)),
          "\nwin ratio is: ", str(np.sum(test_y > 0) / len(test_y)))
    print("after training\n"
          "mean class_1 is: ", str(np.average(test_y[y_p > 0])),
          "\nwin ratio is: ", str(np.sum(test_y[y_p > 0] > 0) / np.sum(y_p > 0)),
          "\ntotal class_1 is:", str(np.sum(np.sum(y_p > 0))),
          "\nmean class_0 is: ", str(np.average(test_y[y_p < 0])))
def fit(self, X, y):
        feature = X[:,0]
        p, r, t = precision_recall_curve(y, feature)
        #nonzero = (p > 0) & (r > 0)
        #p, r, t = p[nonzero], r[nonzero], t[nonzero[1:]]
        f1 = np.divide(2 * np.multiply(p, r), p + r)
        f1[np.isnan(f1)] = -1.0
        self.threshold_ = t[f1.argmax()]
项目:deepcpg    作者:cangermueller    | 项目源码 | 文件源码
def get_curve_fun(name):
    """Return performance curve function by its name."""
    if name == 'roc':
        return skm.roc_curve
    elif name == 'pr':
        return skm.precision_recall_curve
        raise ValueError('Invalid performance curve "%s"!' % name)
def plot_precision_recall(y, y_pred, spacing=0.2):
    precision, recall, thresholds = precision_recall_curve(y, y_pred)
    roc_auc = auc(recall, precision)

    plt.title('Precision vs Recall Curve', fontsize=18)
    plt.plot(recall, precision, 'b', label='AUC = %0.2f'% roc_auc)
    plt.legend(loc='lower right')
    plt.ylabel('Precision', fontsize=16)
    plt.xlabel('Recall', fontsize=16)

    acc = 0
    euc = spacing
    lx = 0 
    ly = 0
    for idx, t in enumerate(thresholds):
        if acc >= spacing or idx == len(thresholds)-1:
                     '%0.2f' % t, 
            acc = 0
            acc += euc

        euc = ((recall[idx] - lx)**2 + (precision[idx] - ly)**2)**0.5
        lx = recall[idx]
        ly = precision[idx]
def scores(self, mdl):
        scores = mdl._scores(,, self.os)
        pr, rc, _ = precision_recall_curve(self.ys, scores)
        roc = roc_auc_score(self.ys, scores)
        return auc(rc, pr), roc
项目:healthcareai-py    作者:HealthCatalyst    | 项目源码 | 文件源码
def compute_pr(y_test, probability_predictions):
    Compute Precision-Recall, thresholds and PR AUC.

        y_test (list) : true label values corresponding to the predictions. Also length n.
        probability_predictions (list) : predictions coming from an ML algorithm of length n.


    _validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)

    # Calculate PR
    precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
    pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)

    # get ideal cutoffs for suggestions (upper right or 1,1)
    pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2

    # To prevent the case where there are two points with the same minimum distance, return only the first
    # np.where returns a tuple (we want the first element in the first array)
    pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
    best_precision = precisions[pr_index]
    best_recall = recalls[pr_index]
    ideal_pr_cutoff = pr_thresholds[pr_index]

    return {'pr_auc': pr_auc,
            'best_pr_cutoff': ideal_pr_cutoff,
            'best_precision': best_precision,
            'best_recall': best_recall,
            'precisions': precisions,
            'recalls': recalls,
            'pr_thresholds': pr_thresholds}
def plot_precision_recall_n(y_true, y_prob, model_name, pdf=None):
    y_score = y_prob
    precision_curve, recall_curve, pr_thresholds = precision_recall_curve(
        y_true, y_score)
    precision_curve = precision_curve[:-1]
    recall_curve = recall_curve[:-1]
    pct_above_per_thresh = []
    number_scored = len(y_score)
    for value in pr_thresholds:
        num_above_thresh = len(y_score[y_score >= value])
        pct_above_thresh = num_above_thresh / float(number_scored)
    pct_above_per_thresh = np.array(pct_above_per_thresh)
    fig, ax1 = plt.subplots()
    ax1.plot(pct_above_per_thresh, precision_curve, 'b')
    ax1.set_xlabel('percent of population')
    ax1.set_ylabel('precision', color='b')
    ax2 = ax1.twinx()
    ax2.plot(pct_above_per_thresh, recall_curve, 'r')
    ax2.set_ylabel('recall', color='r')

    name = model_name
    if pdf:
def get_threshold(model_id):
    trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t')
    model_config = trained_models[trained_models["model_id"] == model_id]
    if model_config.empty:
        raise ValueError("Can't find the model %s in %s" %
                         (model_id, common.DEFAULT_TRAINED_MODELS_FILE))
    model_config = model_config.to_dict(orient="list")

    Y_test = np.load(common.DATASETS_DIR+'/item_factors_test_%s_%s_%s.npy' % (model_settings['fact'],model_settings['dim'],model_settings['dataset']))
    Y_pred = np.load(common.FACTORS_DIR+'/factors_%s.npy' % model_id)

    good_scores = Y_pred[Y_test==1]
    th = good_scores.mean()
    std = good_scores.std()
    print 'Mean th',th
    print 'Std',std

    p, r, thresholds = precision_recall_curve(Y_test.flatten(), Y_pred.flatten())
    f = np.nan_to_num((2 * (p*r) / (p+r)) * (p>r))
    print f
    max_f = np.argmax(f)
    fth = thresholds[max_f]
    print f[max_f],p[max_f],r[max_f]
    print 'F th %.2f' % fth
    plt.plot(r, p, 
             label='Precision-recall curve of class {0}')

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.title('Extension of Precision-Recall curve to multi-class')
项目:GICF    作者:dkloz    | 项目源码 | 文件源码
def auprc(self):
        precision, recall, thresholds = precision_recall_curve(self.y, self.y_hat)
        area = auc(recall, precision)
        return area
def descrive(id_list,pn_list):
  precision, recall, th = precision_recall_curve(np.array(id_list), np.array(pn_list))
  plt.plot(recall, precision, lw=2, color='navy',label='Precision-Recall curve')
  plt.ylim([0.0, 1.05])
  plt.xlim([0.0, 1.0])

项目:CerebralCortex-2.0-legacy    作者:MD2Korg    | 项目源码 | 文件源码
def f1Bias_scorer_CV(probs, y, ret_bias=False):
    precision, recall, thresholds = metrics.precision_recall_curve(y, probs)

    f1 = 0.0
    for i in range(0, len(thresholds)):
        if not (precision[i] == 0 and recall[i] == 0):
            f = 2 * (precision[i] * recall[i]) / (precision[i] + recall[i])
            if f > f1:
                f1 = f
                bias = thresholds[i]

    if ret_bias:
        return f1, bias
        return f1
def logistic_regression_cv(post_features, post_class, C, cv_n_folds, length_dataset = -1, pr = False, dump = True):
    flag = 0
    train_error = []
    test_error = []
    if(length_dataset == -1):
        length_dataset = len(post_class)
    cv = KFold(n = length_dataset, n_folds = cv_n_folds, shuffle = True)
    for train, test in cv: 
        clf = LogisticRegression(C = C,verbose = 0)[train], post_class[train])
        train_predicted = classify(clf,post_features[train])
        test_predicted = classify(clf,post_features[test])
        train_error.append(np.mean(abs(post_class[train].reshape(len(train),1) - train_predicted)))
        test_error.append(np.mean(abs(post_class[test].reshape(len(test),1) - test_predicted)))
        if(pr == True):
            precision, recall, thresholds = precision_recall_curve(post_class[test], test_predicted)
        if(dump == True and flag == 0):
            pickle.dump(clf, open("logreg.dat","w"))
            flag = 1

    if(pr == True):
        return np.mean(train_error),np.mean(test_error), precision, recall, thresholds
        return np.mean(train_error),np.mean(test_error)
def train_model(clf, cv, X, y, name, plot = False):    
    test_accuracy = []
    train_accuracy = []
    pr_auc_scores = []
    precisions, recalls, thresholds = [], [], []
    for train,test in cv:

        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test], y_train)

        train_accuracy.append(clf.score(X_train, y_train))
        test_accuracy.append(clf.score(X_test, y_test))

        proba = clf.predict_proba(X_test)
        precision, recall, threshold = precision_recall_curve(y_test, proba[:,1])

    if plot:
        scores_to_sort = pr_auc_scores
        median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]

        plt.plot(recalls[median], precisions[median], 'r-',label = "p/r")
        plt.fill_between(recalls[median], 0, precisions[median], facecolor = 'cyan')
        plt.ylim(.5, 1.05)
        plt.legend(loc = "right center")
        plt.title('P/R ({}), auc = {}'.format(name, np.mean(pr_auc_scores)))
        plt.savefig('PR - {}.png'.format(name))

    return np.mean(train_accuracy), np.mean(test_accuracy), np.mean(pr_auc_scores)
def evaluate_model(model, features, labels, tile_size, out_path, out_format="GeoTIFF"):
    """Calculate several metrics for the model and create a visualisation of the test dataset."""

    print('_' * 100)
    print("Start evaluating model.")

    X, y_true = get_matrix_form(features, labels, tile_size)

    y_predicted = model.predict(X)
    predicted_bitmap = np.array(y_predicted)

    # Since the model only outputs probabilites for each pixel we have
    # to transform them into 0s and 1s. For the sake of simplicity we
    # simply use a cut off value of 0.5.
    predicted_bitmap[0.5 <= predicted_bitmap] = 1
    predicted_bitmap[predicted_bitmap < 0.5] = 0

    false_positives = get_false_positives(predicted_bitmap, y_true)
    visualise_predictions(predicted_bitmap, labels, false_positives, tile_size, out_path, out_format=out_format)

    # We have to flatten our predictions and labels since by default the metrics are calculated by
    # comparing the elements in the list of labels and predictions elemtwise. So if we would not flatten
    # our results we would only get a true positive if we would predict every pixel in an entire tile right.
    # But we obviously only care about each pixel individually.
    y_true = y_true.flatten()
    y_predicted = y_predicted.flatten()
    predicted_bitmap = predicted_bitmap.flatten()

    print("Accuracy on test set: {}".format(metrics.accuracy_score(y_true, predicted_bitmap)))
    print("Precision on test set: {}".format(metrics.precision_score(y_true, predicted_bitmap)))
    print("Recall on test set: {}".format(metrics.recall_score(y_true, predicted_bitmap)))
    precision_recall_curve(y_true, y_predicted, out_path)
项目:WaterNet    作者:treigerm    | 项目源码 | 文件源码
def precision_recall_curve(y_true, y_predicted, out_path):
    """Create a PNG with the precision-recall curve for our predictions."""

    print("Calculate precision recall curve.")
    precision, recall, thresholds = metrics.precision_recall_curve(y_true,

    # Save the raw precision and recall results to a pickle since we might want
    # to analyse them later.
    out_file = os.path.join(out_path, "precision_recall.pickle")
    with open(out_file, "wb") as out:
            "precision": precision,
            "recall": recall,
            "thresholds": thresholds
        }, out)

    # Create the precision-recall curve.
    out_file = os.path.join(out_path, "precision_recall.png")
    plt.plot(recall, precision, label="Precision-Recall curve")
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
项目:menrva    作者:amirziai    | 项目源码 | 文件源码
def precision_recall_curve(clf, x_test, y_test):
    from sklearn.metrics import precision_recall_curve

    for i in range(2):
        y_probabilities = [x[i] for x in clf.predict_proba(x_test)]
        precision, recall, thresholds = precision_recall_curve(y_test, y_probabilities)

        plt.title('Precision Recall Curve')
        plt.plot(recall, precision, 'b')
项目:single-cell-classification    作者:whuTommy    | 项目源码 | 文件源码
def PRC_AUC(Y_hats, Y_test):
    p,r,thresholds = precision_recall_curve(Y_test.flatten(), Y_hats.flatten())
    thresholds = np.hstack([thresholds, thresholds[-1]])
    prc = np.vstack([r,p]).T
    auc = average_precision_score(Y_test.flatten(), Y_hats.flatten(), average='micro')
    return prc, auc, thresholds
def f1_curve(Y_hats, Y_test):
    p,r,thresholds = precision_recall_curve(Y_test.flatten(), Y_hats.flatten())
    thresholds = np.hstack([thresholds, thresholds[-1]])
    f1 = (2 * p * r) / (p + r)
    return f1, thresholds
def evaluate(binarise_result, y_test, y_score, file_name):
  computes the accuracy, precision and recall. plots the precision and recall curve. saves the plots to the figure folder.
  :param binarise_result: list of binarised result after prediction from classifier
  :type binarise_result: list[list[int]]
  :param y_test: list of binarised labels from the test set
  :type y_test: list[list[int]]
  :param y_score: distance of each sample from the decision boundary for each class
  :type y_score:list
  :param file_name: directory name for saving all figures from the plots
  :type file_name: str
  num_class = y_test.shape[1]

  # Compute Precision-Recall and plot curve
  precision = dict()
  recall = dict()
  average_precision = dict()
  for i in range(num_class):
    precision[i], recall[i], _ = precision_recall_curve(y_test[:, i], y_score[:, i])
    average_precision[i] = average_precision_score(y_test[:, i], y_score[:, i])

  # Compute micro-average ROC curve and ROC area
  precision["micro"], recall["micro"], _ = precision_recall_curve(y_test.ravel(), y_score.ravel())
  average_precision["micro"] = average_precision_score(y_test, y_score, average="micro")

  # create directory
  create_directory('figure/' + file_name)

  # plots
  plot_precision_recall_curve(average_precision, precision, recall, file_name)
  # Plot Precision-Recall curve for each class
  plot_precision_recall_curve_all_classes(average_precision, precision, recall, file_name,

  generate_eval_metrics(binarise_result, file_name, y_test)
项目:sport-news-retrieval    作者:Andyccs    | 项目源码 | 文件源码
def plot_precision_recall_curve(average_precision, precision, recall, file_name, show_plot=False):
  plt.plot(recall[0], precision[0], label='Precision-Recall curve')
  plt.ylim([0.0, 1.05])
  plt.xlim([0.0, 1.0])
  plt.title('Precision-Recall example: AUC={0:0.2f}'.format(average_precision[0]))
  plt.legend(loc="lower left")
  plt.savefig('figure/' + file_name + '/precision_recall_curve.png')
  if show_plot:
项目:ycml    作者:skylander86    | 项目源码 | 文件源码
def find_best_thresholds(Y_true, Y_proba, *, labels=None, target_names=None, precision_thresholds=None):
    Y_true, Y_proba = _make_label_indicator(Y_true, Y_proba)
    Y_true, Y_proba, target_names = _filter_labels(Y_true, Y_proba, labels=labels, target_names=target_names)
    n_classes = Y_true.shape[1]

    if precision_thresholds is not None and isinstance(precision_thresholds, float): precision_thresholds = np.full(n_classes, precision_thresholds)

    assert Y_true.shape[0] == Y_proba.shape[0]
    assert Y_true.shape[1] == Y_proba.shape[1]
    assert len(target_names) == n_classes

    thresholds = np.zeros(n_classes)
    for i in range(n_classes):
        if n_classes == 2 and i == 1:
            thresholds[i] = 1.0 - thresholds[0]
        #end if

        p, r, t = precision_recall_curve(Y_true[:, i], Y_proba[:, i])
        f1 = np.nan_to_num((2 * p * r) / (p + r + 1e-8))

        if precision_thresholds is None:  # use optimal threshold
            best_f1_i = np.argmax(f1)

        else:  # use optimal threshold for precision > precision_threshold
                best_f1_i = max(filter(lambda k: p[k] >= precision_thresholds[i], range(p.shape[0])), key=lambda k: f1[k])
                if best_f1_i == p.shape[0] - 1 or f1[best_f1_i] == 0.0: raise ValueError()

            except ValueError:
                best_f1_i = np.argmax(f1)
                logger.warning('Unable to find threshold for label "{}" where precision >= {}. Defaulting to best threshold of {}.'.format(target_names[i], precision_thresholds[i], t[best_f1_i]))
            #end try

        #end if

        thresholds[i] = t[best_f1_i]
    #end for

    return thresholds
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_precision_recall_curve_pos_label():
    y_true, _, probas_pred = make_prediction(binary=False)
    pos_label = 2
    p, r, thresholds = precision_recall_curve(y_true,
                                              probas_pred[:, pos_label],
    p2, r2, thresholds2 = precision_recall_curve(y_true == pos_label,
                                                 probas_pred[:, pos_label])
    assert_array_almost_equal(p, p2)
    assert_array_almost_equal(r, r2)
    assert_array_almost_equal(thresholds, thresholds2)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_precision_recall_curve_errors():
    # Contains non-binary labels
    assert_raises(ValueError, precision_recall_curve,
                  [0, 1, 2], [[0.0], [1.0], [1.0]])
def get_precision_recall_curve(y_gold_standard,y_predicted):
    Computes the precision-recall curve.

    Keyword arguments:
    y_gold_standard -- Expected labels.
    y_predicted -- Predicted labels

    return precision_recall_curve(y_gold_standard, y_predicted)
def prCurve(y_true, y_scores, recallMultiplier):
        # Recall multiplier - accounts for the percentage examples unreached by 
        precision, recall, _ = precision_recall_curve(y_true, y_scores)
        recall = recall * recallMultiplier
        return precision, recall

项目:prep    作者:ysyushi    | 项目源码 | 文件源码
def calc_metric(truth, score):
    print "ROCAUC:", roc_auc_score(truth, score)
    precision, recall, _ = precision_recall_curve(truth, score)
    print "AUPRC: ", auc(recall, precision)
def plot_roc(y_score, y_test, target_map, title='ROC curve'): 
    import matplotlib.pyplot as plt
    from sklearn.metrics import roc_curve, auc, precision_recall_curve
    from sklearn.preprocessing import label_binarize

    # Compute Precision-Recall and plot curve
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    target_ids = target_map.keys()
    target_names = target_map.values()
    print target_names

    y_test_multi = label_binarize(y_test, classes=target_ids)
    N, n_classes = y_score.shape[:2]
    for i,name in enumerate(target_names):
        fpr[name], tpr[name], _ = roc_curve(y_test_multi[:, i], y_score[:, i])
        roc_auc[name] = auc(fpr[name], tpr[name]) 

    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_test_multi.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) 

    # Plot Precision-Recall curve for each class
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr["micro"], tpr["micro"],
             label='ROC curve (area = {0:0.2f})'
                   ''.format(roc_auc["micro"]), linewidth=3)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.ylim([0.0, 1.0])
    plt.xlim([0.0, 1.0])
    plt.legend(loc="lower right")

    for i,name in enumerate(target_names):
        plt.plot(fpr[name], tpr[name],
                 label='{0}'.format(name.title().replace('_', ' ')))
                 # label='{0} (area = {1:0.2f})'
                 #       ''.format(name.title().replace('_', ' '), roc_auc[name]))

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc="lower right")