Python sklearn.metrics 模块,recall_score() 实例源码


项目:dac-training    作者:jlonij    | 项目源码 | 文件源码
def validate(data, labels):
    Ten-fold cross-validation with stratified sampling.
    accuracy_scores = []
    precision_scores = []
    recall_scores = []
    f1_scores = []

    sss = StratifiedShuffleSplit(n_splits=10)
    for train_index, test_index in sss.split(data, labels):
        x_train, x_test = data[train_index], data[test_index]
        y_train, y_test = labels[train_index], labels[test_index], y_train)
        y_pred = clf.predict(x_test)
        accuracy_scores.append(accuracy_score(y_test, y_pred))
        precision_scores.append(precision_score(y_test, y_pred))
        recall_scores.append(recall_score(y_test, y_pred))
        f1_scores.append(f1_score(y_test, y_pred))

    print('Accuracy', np.mean(accuracy_scores))
    print('Precision', np.mean(precision_scores))
    print('Recall', np.mean(recall_scores))
    print('F1-measure', np.mean(f1_scores))
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics
项目:texta    作者:texta-tk    | 项目源码 | 文件源码
def train_model_with_cv(model, params, X, y):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

    # Use Train data to parameter selection in a Grid Search
    gs_clf = GridSearchCV(model, params, n_jobs=1, cv=5)
    gs_clf =, y_train)
    model = gs_clf.best_estimator_

    # Use best model and test data for final evaluation
    y_pred = model.predict(X_test)

    _f1 = f1_score(y_test, y_pred, average='micro')
    _confusion = confusion_matrix(y_test, y_pred)
    __precision = precision_score(y_test, y_pred)
    _recall = recall_score(y_test, y_pred)
    _statistics = {'f1_score': _f1,
                   'confusion_matrix': _confusion,
                   'precision': __precision,
                   'recall': _recall

    return model, _statistics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def metrics(self, X, y):
        metrics = {}
        y_pred_pair, loss = self.predict_proba_with_loss(X, y)
        y_pred = y_pred_pair[:,1]  ## From softmax pair to prob of catastrophe

        metrics['loss'] = loss
        threshold = self.threshold_from_data(X, y)
        metrics['threshold'] = threshold
        metrics['np.std(y_pred)'] = np.std(y_pred)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
        metrics['fpr'] = float(num) / float(denom)
        if any(y) and not all(y):
            metrics['auc'] = roc_auc_score(y, y_pred)
            y_pred_bool = y_pred >= threshold
            if (any(y_pred_bool) and not all(y_pred_bool)):
                metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
                metrics['recall'] = recall_score(y, y_pred_bool)

        return metrics
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def classification_metrics(y, y_pred, threshold):
    metrics = {}
    metrics['threshold'] = threshold_from_predictions(y, y_pred, 0)
    metrics['np.std(y_pred)'] = np.std(y_pred)
    metrics['positive_frac_batch'] = float(np.count_nonzero(y == True)) / len(y)
    denom = np.count_nonzero(y == False)
    num = np.count_nonzero(np.logical_and(y == False, y_pred >= threshold))
    if denom > 0:
        metrics['fpr'] = float(num) / float(denom)
    if any(y) and not all(y):
        metrics['auc'] = roc_auc_score(y, y_pred)
        y_pred_bool = y_pred >= threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
    return metrics
项目:Stock-SentimentAnalysis    作者:JoshuaMichaelKing    | 项目源码 | 文件源码
def classifier_score(tp, classifier, train_list, test, test_tag):
    Output:pos_precision, pos_recall, accuracy_score
    starttime =
    classifier = SklearnClassifier(classifier)
    iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl')
    pred = classifier.classify_many(test)  # ????????list
    y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
    y_pred = [1 if tag == 'pos' else 0 for tag in pred]
    pos_precision = precision_score(y_true, y_pred)
    pos_recall = recall_score(y_true, y_pred)
    endtime =
    interval = (endtime - starttime).microseconds
    interval = interval / 100
    return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)

项目:watlink    作者:dustalov    | 项目源码 | 文件源码
def evaluate(path):
    true = [int(pair[1] is None or gold[pair]) for pair in resources[path]]
    pred = [int(pair[1] is not None)           for pair in resources[path]]

    tn, fp, fn, tp = confusion_matrix(true, pred).ravel()

    return {
        'tn':        tn,
        'fp':        fp,
        'fn':        fn,
        'tp':        tp,
        'precision': precision_score(true, pred),
        'recall':    recall_score(true, pred),
        'f1':        f1_score(true, pred),
        'scores':    scores(resources[path])
项目:watlink    作者:dustalov    | 项目源码 | 文件源码
def evaluate(path):
    G = resources[path]

    pred = [int(has_sense_path(G, *pair)) for pair in union]

    tn, fp, fn, tp = confusion_matrix(true, pred).ravel()

    return {
        'tn':        tn,
        'fp':        fp,
        'fn':        fn,
        'tp':        tp,
        'precision': precision_score(true, pred),
        'recall':    recall_score(true, pred),
        'f1':        f1_score(true, pred),
        'scores':    scores(G)
项目:PEP    作者:ma-compbio    | 项目源码 | 文件源码
def analyzeResult_temp(data,model,DataVecs):
    predict = model.predict(DataVecs)
    data['predict'] = predict
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
        print "ROC unavailable"

# Performance evaluation and result analysis uing adjusted thresholds
项目:PEP    作者:ma-compbio    | 项目源码 | 文件源码
def analyzeResult(data,model,DataVecs,threshold):
    predict = model.predict_proba(DataVecs)[:,1]
    data['predict'] = (predict > threshold)
    print ("Accuracy: %f %%" % (100. * sum(data["label"] == data["predict"]) / len(data["label"])))
    answer1 = data[data["label"] == 1]
    answer2 = data[data["label"] == 0]
    print ("Positive Accuracy: %f %%" % (100. * sum(answer1["label"] == answer1["predict"]) / len(answer1["label"])))
    print ("Negative Accuracy: %f %%" % (100. * sum(answer2["label"] == answer2["predict"]) / len(answer2["label"])))
        result_auc = model.predict_proba(DataVecs)
        print ("Roc:%f\nAUPR:%f\n" % (roc_auc_score(data["label"],result_auc[:,1]),
        print("Precision:%f\nRecall:%f\nF1score:%f\nMCC:%f\n" %(precision_score(data["label"],data["predict"]),
        print "ROC unavailable"

# Performance evaluation
项目:auDeep    作者:auDeep    | 项目源码 | 文件源码
def uar_score(labels: np.ndarray,
              predictions: np.ndarray):
    Computes the unweighted average recall for the specified true labels and predictions.

    The unweighted average recall is simply the average recall for each class without any weighting.

    labels: numpy.ndarray
        A one-dimensional numpy array containing the true labels of instances
        A one-dimensional numpy array containing the predicted labels of instances

        The unweighted average recall for the specified true labels and predictions
    return recall_score(labels, predictions, average="macro")
项目:HappyCat    作者:sparktsao    | 项目源码 | 文件源码
def MyEvaluation(y_test,predicted):
    def norm_me(x):
        if str(type(x)).find("int")>-1:
            return x
        zix = np.argmax(x)
        x1 = [0]*len(x)
        x1[zix] = 1
        return x1
    predicted = [norm_me(x) for x in predicted]
    predicted = np.array(predicted,dtype="uint8")

    target_names  = ['normal','malware']
    inv_map = {v: k for k, v in KLABEL.items()}
    target_names = [inv_map[x] for x in range(WORKING_KLABEL)]
    result = classification_report(y_test,predicted,target_names=target_names)
    print result

    averagelabel = 'binary'
    if B_MULTICLASS: averaegelabel = "macro"

    v_precision = precision_score(y_test,predicted, average=averagelabel)
    v_recall = recall_score(y_test,predicted, average=averagelabel)    

    (TP, FP, TN, FN) = perf_measure(y_test, predicted,KLABEL["malicious"])
    return v_precision,v_recall,TP, FP, TN, FN
项目:text-analytics-with-python    作者:dipanjanS    | 项目源码 | 文件源码
def display_evaluation_metrics(true_labels, predicted_labels, positive_class=1):

    print 'Accuracy:', np.round(
    print 'Precision:', np.round(
    print 'Recall:', np.round(
    print 'F1 Score:', np.round(
项目:text-analytics-with-python    作者:dipanjanS    | 项目源码 | 文件源码
def get_metrics(true_labels, predicted_labels):

    print 'Accuracy:', np.round(
    print 'Precision:', np.round(
    print 'Recall:', np.round(
    print 'F1 Score:', np.round(
项目:drugADR    作者:cosylabiiit    | 项目源码 | 文件源码
def get_scores(clf, X_t_train, y_train, X_t_test, y_test):, y_train)
    app = dict()
    score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average=None)
    avg_sample_score = fbeta_score(y_test, clf.predict(X_t_test), beta=2, average='samples')
    prec_score = precision_score(y_test, clf.predict(X_t_test), average='micro')
    rec_score = recall_score(y_test, clf.predict(X_t_test), average='micro')
    avg_prec = average_precision_score(y_test, clf.predict(X_t_test))
    metrics = [score, avg_sample_score, roc_auc_score(y_test, clf.predict_proba(X_t_test))]
    #app['Classwise Scores'] = ([(mlb.classes_[l], score[l]) for l in score.argsort()[::-1]])
    app['F2 Score'] = avg_sample_score
    app['ROC_AUC'] = roc_auc_score(y_test, clf.predict_proba(X_t_test))
    app['P_AUPR'] = avg_prec
    app['Precision'] = prec_score
    app['Recall'] = rec_score
    return app
项目:100knock2017    作者:tmu-nlp    | 项目源码 | 文件源码
def cv(feature_dict, feature, polarity, folds):
    kfold = KFold(len(polarity), n_folds = folds)
    count, f1, recall, precision, accuracy = 0, 0, 0, 0, 0
    for train, test in kfold:
        LR = LogisticRegression()
        count += 1
        x = [(feature[i]) for i in train]
        y = [(polarity[i])for i in train], (y))

        test_label = []
        answer_label = [(polarity[j]) for j in test]
        for j in test:
            query = feature[j]
            result = -1 if query.shape[1] != len(feature_dict) else predict(LR, query)
        accuracy += accuracy_score(answer_label, test_label)
        precision += precision_score(answer_label, test_label)
        recall += recall_score(answer_label, test_label)
        f1 += f1_score(answer_label, test_label)
        print('{}_fold finished.'.format(count))
    return accuracy, precision, recall, f1
项目:TextClassification    作者:mosu027    | 项目源码 | 文件源码
def printResult(y_true, y_pred):

    acc = accuracy_score(y_true, y_pred)
    print("Accuracy: {:.4%}".format(acc))

    precision = metrics.precision_score(y_true, y_pred)
    recall = metrics.recall_score(y_true, y_pred)
    f1_score = metrics.f1_score(y_true, y_pred)
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)

    print   "Precision:", precision
    print   "Recall:", recall
    print   "f1_score:", f1_score
    print   "confusion_matrix:"
    print   confusion_matrix

    resultStr = "Precision: " + str(precision) +"\n" + \
                "Recall: " + str(recall) + "\n" + \
                "f1_score: " + str(f1_score) +"\n" + \
                "confusion_matrix" + "\n" +\
                str(confusion_matrix) + "\n"
    return resultStr
项目:b4msa    作者:INGEOTEC    | 项目源码 | 文件源码
def compute_score(self, conf, hy):
        RS = recall_score(self.y, hy, average=None)
        conf['_all_f1'] = M = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(f1_score(self.y, hy, average=None))}
        conf['_all_recall'] = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(RS)}
        conf['_all_precision'] = N = {str(self.le.inverse_transform([klass])[0]): f1 for klass, f1 in enumerate(precision_score(self.y, hy, average=None))}
        conf['_macrorecall'] = np.mean(RS)
        if len(self.le.classes_) == 2:
            conf['_macrof1'] = np.mean(np.array([v for v in conf['_all_f1'].values()]))
            conf['_weightedf1'] = conf['_microf1'] = f1_score(self.y, hy, average='binary')
            conf['_macrof1'] = f1_score(self.y, hy, average='macro')
            conf['_microf1'] = f1_score(self.y, hy, average='micro')
            conf['_weightedf1'] = f1_score(self.y, hy, average='weighted')
        conf['_accuracy'] = accuracy_score(self.y, hy)
        if self.score.startswith('avgf1:'):
            _, k1, k2 = self.score.split(':')
            conf['_' + self.score] = (M[k1] + M[k2]) / 2
        elif self.score.startswith('avgf1f0:'):
            _, k1, k2 = self.score.split(':')
            pos = (M[k1] + N[k1]) / 2.
            neg = (M[k2] + N[k2]) / 2.
            conf['_' + self.score] = (pos + neg) / 2.
        conf['_score'] = conf['_' + self.score]
项目:TensorFlow_DCIGN    作者:yselivonchyk    | 项目源码 | 文件源码
def evaluate_precision_recall(y, target, labels):
  import sklearn.metrics as metrics
  target = target[:len(y)]
  num_classes = max(target) + 1
  results = []
  for i in range(num_classes):
    class_target = _extract_single_class(i, target)
    class_y = _extract_single_class(i, y)

      'precision': metrics.precision_score(class_target, class_y),
      'recall': metrics.recall_score(class_target, class_y),
      'f1': metrics.f1_score(class_target, class_y),
      'fraction': sum(class_target)/len(target),
      '#of_class': int(sum(class_target)),
      'label': labels[i],
      'label_id': i
      # 'tp': tp
    print('%d/%d' % (i, num_classes), results[-1])
  accuracy = metrics.accuracy_score(target, y)
  return accuracy, results
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_ovr_multilabel_dataset():
    base_clf = MultinomialNB(alpha=1)
    for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
        X_train, Y_train = X[:80], Y[:80]
        X_test, Y_test = X[80:], Y[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
        Y_pred = clf.predict(X_test)

        assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
        assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_precision_recall_f_ignored_labels():
    # Test a subset of labels may be requested for PRF
    y_true = [1, 1, 2, 3]
    y_pred = [1, 3, 3, 3]
    y_true_bin = label_binarize(y_true, classes=np.arange(5))
    y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
    data = [(y_true, y_pred),
            (y_true_bin, y_pred_bin)]

    for i, (y_true, y_pred) in enumerate(data):
        recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
        recall_all = partial(recall_score, y_true, y_pred, labels=None)

        assert_array_almost_equal([.5, 1.], recall_13(average=None))
        assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
        assert_almost_equal((.5 * 2 + 1. * 1) / 3,
        assert_almost_equal(2. / 3, recall_13(average='micro'))

        # ensure the above were meaningful tests:
        for average in ['macro', 'weighted', 'micro']:
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_zero_precision_recall():
    # Check that pathological cases do not bring NaNs

    old_error_settings = np.seterr(all='raise')

        y_true = np.array([0, 1, 2, 0, 1, 2])
        y_pred = np.array([2, 0, 1, 1, 2, 0])

        assert_almost_equal(precision_score(y_true, y_pred,
                                            average='weighted'), 0.0, 2)
        assert_almost_equal(recall_score(y_true, y_pred, average='weighted'),
                            0.0, 2)
        assert_almost_equal(f1_score(y_true, y_pred, average='weighted'),
                            0.0, 2)

项目:Sacred_Deep_Learning    作者:AAbercrombie0492    | 项目源码 | 文件源码
def on_epoch_end(self, epoch, logs={}):
        import numpy as np
        from sklearn.metrics import recall_score, precision_score, roc_auc_score, f1_score
        y_pred = self.model.predict(self.X_val)
        y_pred = np.argmax(y_pred, axis=1)

        recall = recall_score(self.y_val, y_pred, average=None).mean()
        logs['recall'] = recall

        precision = precision_score(self.y_val, y_pred, average=None).mean()
        logs['precision'] = precision

        auc = roc_auc_score(self.y_val, y_pred, average=None).mean()
        logs['auc'] = auc

        f1 = f1_score(self.y_val, y_pred, average=None).mean()
        logs['f1'] = f1
项目:treelstm    作者:nicolaspi    | 项目源码 | 文件源码
def test(self, data, session):
        ys_true = collections.deque([])
        ys_pred = collections.deque([])
        for batch in data:
            y_pred = tf.argmax(self.get_output(), 1)
            y_true = self.labels
            feed_dict = {self.labels: batch[0].root_labels}
            y_pred, y_true =[y_pred, y_true], feed_dict=feed_dict)
            ys_true += y_true.tolist()
            ys_pred += y_pred.tolist()
        ys_true = list(ys_true)
        ys_pred = list(ys_pred)
        score = metrics.accuracy_score(ys_true, ys_pred)
        print "Accuracy", score
        #print "Recall", metrics.recall_score(ys_true, ys_pred)
        #print "f1_score", metrics.f1_score(ys_true, ys_pred)
        print "confusion_matrix"
        print metrics.confusion_matrix(ys_true, ys_pred)
        return score
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def metrics(self, X, y):
        metrics = {}
        y_pred = self.predict_proba(X)
        metrics['threshold'] = self.threshold_from_data(X, y)
        denom = np.count_nonzero(y == False)
        num = np.count_nonzero(np.logical_and(y == False, y_pred >= self.threshold))
        metrics['fpr'] = float(num) / float(denom)
        y_pred_bool = y_pred >= self.threshold
        if (any(y_pred_bool) and not all(y_pred_bool)):
            metrics['precision'] = precision_score(np.array(y, dtype=np.float32), y_pred_bool)
            metrics['recall'] = recall_score(y, y_pred_bool)
        return metrics
项目:PersonalizedMultitaskLearning    作者:mitmedialab    | 项目源码 | 文件源码
def computeRecall(preds, true_y):
        if 1 not in true_y:
            # Recall is ill-defined and being set to 0.0 due to no true samples
            return np.nan
        return recall_score(true_y, preds)
        return np.nan
项目:deepcut    作者:rkcosmos    | 项目源码 | 文件源码
def evaluate(best_processed_path, model):
    Evaluate model on splitted 10 percent testing set
    x_test_char, x_test_type, y_test = prepare_feature(best_processed_path, option='test')

    y_predict = model.predict([x_test_char, x_test_type])
    y_predict = (y_predict.ravel() > 0.5).astype(int)

    f1score = f1_score(y_test, y_predict)
    precision = precision_score(y_test, y_predict)
    recall = recall_score(y_test, y_predict)

    return f1score, precision, recall
项目:mitre    作者:gerberlab    | 项目源码 | 文件源码
def leave_one_out_report(combined_results):
    """ Evaluate leave-one-out CV results from different methods.

    combined_results: list of tuples of the form
    (method_name, true_y_vector, predicted_probabilities_vector)

    Note the vectors really do need to be numpy arrays.

    Returns: formatted report as string

    # Unfortunate code duplication with tabulate_metrics here,
    # to be resolved later
    probability_metrics = [
        ('AUC', roc_auc_score),
        ('AP', metrics.average_precision_score)
    binary_metrics = [
        ('F1', metrics.f1_score),
        ('MCC', metrics.matthews_corrcoef),
        ('precision', metrics.precision_score),
        ('recall', metrics.recall_score)
    metric_results = {label: [] for label, _ in
               probability_metrics + binary_metrics}
    metric_results.update({'tn': [], 'fp': [], 'fn': [], 'tp': []})
    for label, metric in probability_metrics:
        for fold, y_true, y_pred in combined_results:
            metric_results[label].append(metric(y_true, y_pred))
    for method, y_true, probabilities in combined_results:
        y_pred = probabilities > 0.5
        for label, metric in binary_metrics:
            metric_results[label].append(metric(y_true, y_pred))
        conf = zip(
            ('tn', 'fp', 'fn', 'tp'),
            metrics.confusion_matrix(y_true, y_pred).flat
        for label, n in conf:
    index=[t[0] for t in combined_results]
    table = pd.DataFrame(data=metric_results, 
    report = table.to_string(float_format=lambda x: '%.3g' % x)
    return report
项目:triage    作者:dssg    | 项目源码 | 文件源码
def recall(_, predictions_binary, labels, parameters):
    return metrics.recall_score(labels, predictions_binary, **parameters)
项目:trend_ml_toolkit_xgboost    作者:raymon-tian    | 项目源码 | 文件源码
def custom_eval_metirc_recall(preds,dtrain):
    labels = dtrain.get_label()
    flag1 =<=1.0)
    flag2 =>=0.0)
    flag = flag1*flag2
    assert flag == 1,"??????????"
    preds = preds>=0.5
    preds = preds.astype(int)
    recall = recall_score(labels,preds)
    return 'recall',recall
项目:rdocChallenge    作者:Elyne    | 项目源码 | 文件源码
def getScores(labels_true, labels_pred):
    str2 = "Average Precision: "+ str(precision_score(labels_true, labels_pred, average='weighted'))+'\n'
    str2 += "Average Recall: "+ str( recall_score(labels_true, labels_pred, average='weighted'))+'\n'
    str2 += "Average F1-measure: "+ str( f1_score(labels_true, labels_pred, average='weighted'))+'\n'
    str2 += "Accuracy score: "+ str( accuracy_score(labels_true, labels_pred))+'\n'

    str2 += "Mean absolute error (sklearn) on the test set is:"+ str( mean_absolute_error(labels_true, labels_pred))+'\n'
    str2 += "Average Mean absolute error, and per class (official): "+ str(mae(labels_true, labels_pred))+'\n'
    str2 += "Average Mean absolute error (official): " + str(mae(labels_true, labels_pred)[1])+'\n'

    return str2
项目:sota_sentiment    作者:jbarnesspain    | 项目源码 | 文件源码
def recall(self):
        return recall_score(self._y_true, self._y_pred, self._labels,
                               self._pos_label, self._average)
项目:scik-learn-learn-Chinese-text-classider    作者:chapzq77    | 项目源码 | 文件源码
def calculate_3result(actual,predict):
    m_precison = metrics.precision_score(actual,predict,average='macro')
    m_recall = metrics.recall_score(actual,predict,average='macro')
    m_f1 = metrics.f1_score(actual,predict,average='macro')
    print "?????"
    print "????{0:.3f}".format(m_precison)
    print "????{0:.3f}".format(m_recall)
    print "f1-score:{0:.3f}".format(m_f1)

项目:2in1    作者:XunGuangxu    | 项目源码 | 文件源码
def getScores( true_classes, pred_classes, average):
    precision = metrics.precision_score( true_classes, pred_classes, average=average )
    recall = metrics.recall_score( true_classes, pred_classes, average=average )
    f1 = metrics.f1_score( true_classes, pred_classes, average=average )
    accuracy = metrics.accuracy_score( true_classes, pred_classes )
    return precision, recall, f1, accuracy
项目:MultimodalAutoencoder    作者:natashamjaques    | 项目源码 | 文件源码
def compute_all_classification_metrics(preds, true_y):
    """Computes the accuracy, AUC, F1, precision, and recall for the model's predictions. 

        true_y: The ground truth labels.
        preds: The model's predicted labels.
    Returns: float accuracy, AUC, F1, precision, and recall
    acc = compute_classification_metric(binary_accuracy, true_y, preds)
    auc = compute_classification_metric(roc_auc_score, true_y, preds)
    f1 = compute_classification_metric(f1_score, true_y, preds)
    precision = compute_classification_metric(precision_score, true_y, preds)
    recall = compute_classification_metric(recall_score, true_y, preds)
    return acc, auc, f1, precision, recall
项目:ml-projects    作者:saopayne    | 项目源码 | 文件源码
def evaluate(test_labels, predictions):
    precision = precision_score(test_labels, predictions, average='micro')
    recall = recall_score(test_labels, predictions, average='micro')
    f1 = f1_score(test_labels, predictions, average='micro')
    print("Micro-average quality numbers")
    print("Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}".format(precision, recall, f1))

    precision = precision_score(test_labels, predictions, average='macro')
    recall = recall_score(test_labels, predictions, average='macro')
    f1 = f1_score(test_labels, predictions, average='macro')

    print("Macro-average quality numbers")
    print("Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}".format(precision, recall, f1))
项目:EUSIPCO2017    作者:Veleslavia    | 项目源码 | 文件源码
def report_metrics(self, threshold):
        for average_strategy in ["micro", "macro"]:
            print("{} average strategy, threshold {}".format(average_strategy, threshold))
            print("precision:\t{}".format(precision_score(self.y_true, self.y_pred, average=average_strategy)))
            print("recall:\t{}".format(recall_score(self.y_true, self.y_pred, average=average_strategy)))
            print("f1:\t{}".format(f1_score(self.y_true, self.y_pred, average=average_strategy)))
项目:Stock-SentimentAnalysis    作者:JoshuaMichaelKing    | 项目源码 | 文件源码
def sentiment_lexicon_score(pos_lexicon_dict, neg_lexicon_dict, test, test_tag):
    Sentiment Lexicon Score
    Input Type : [[,], [,], ...]
    Output:pos_precision, pos_recall, accuracy_score
    if type(test) is not type([]):
        raise TypeError("There is a type error","input test should be list!")

    starttime =
    pred = []
    for blog_lst in test:
        score = rp.sentiment_logarithm_estimation(pos_lexicon_dict, neg_lexicon_dict, blog_lst)
        if score > 0:

    y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
    y_pred = [1 if tag == 'pos' else 0 for tag in pred]
    pos_precision = precision_score(y_true, y_pred)
    pos_recall = recall_score(y_true, y_pred)
    endtime =
    interval = (endtime - starttime).microseconds
    interval = interval / 100
    return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)

项目:SentiCR    作者:senticr    | 项目源码 | 文件源码
def ten_fold_cross_validation(dataset,ALGO):
    kf = KFold(n_splits=10)

    run_precision = []
    run_recall = []
    run_f1score = []
    run_accuracy = []


    #Randomly divide the dataset into 10 partitions
    # During each iteration one partition is used for test and remaining 9 are used for training
    for train, test in kf.split(dataset):
        print("Using split-"+str(count)+" as test data..")
        classifier_model=SentiCR(algo=ALGO,training_data= dataset[train])

        test_comments=[comments.text for comments in dataset[test]]
        test_ratings=[comments.rating for comments in dataset[test]]

        pred = classifier_model.get_sentiment_polarity_collection(test_comments)

        precision = precision_score(test_ratings, pred, pos_label=-1)
        recall = recall_score(test_ratings, pred, pos_label=-1)
        f1score = f1_score(test_ratings, pred, pos_label=-1)
        accuracy = accuracy_score(test_ratings, pred)


    return (mean(run_precision),mean(run_recall),mean(run_f1score),mean(run_accuracy))
项目:MachineLearningProject    作者:ymynem    | 项目源码 | 文件源码
def get_table_values(cats, y_true, y_predicted):
    zipped = list(zip(y_true, y_predicted))
    f1s = [f1(y_t, y_p, average=None, labels=cats) for y_t, y_p in zipped]
    pres = [precision(y_t, y_p, average=None, labels=cats) for y_t, y_p in zipped]
    recs = [recall(y_t, y_p, average=None, labels=cats) for y_t, y_p in zipped]
    values = {}
    for i, cat in zip(range(len(cats)), cats):
        values[cat] = {
            "F1": (mean([v[i] for v in f1s]), std([v[i] for v in f1s])),
            "precision": (mean([v[i] for v in pres]), std([v[i] for v in pres])),
            "recall": (mean([v[i] for v in recs]), std([v[i] for v in recs])),
    return values
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def get_score(preds, target, test_numbers):
    all_accuracy = []
    all_precision = []
    all_recall = []
    all_f_values = []
    for tests, pred in zip(test_numbers, preds):
        answers = []
        for test in tests:
        all_accuracy.append(accuracy_score(answers, pred))
        all_precision.append(precision_score(answers, pred))
        all_recall.append(recall_score(answers, pred))
        all_f_values.append(f1_score(answers, pred))
    return np.array(all_accuracy).mean(), np.array(all_precision).mean(), np.array(all_recall).mean(), np.array(all_f_values).mean()
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def cv_prediction(feature_dict, feature, polarity, threshold, folds):
    accuracy = 0
    precision = 0
    recall = 0
    f1 = 0
    count = 0
    dicvec = DictVectorizer()
    LR = LogisticRegression()
    kfold = KFold(len(polarity), n_folds=folds)
    for train, test in kfold:
        count += 1
        x = list()
        y = list()
        [x.append(feature[i]) for i in train]
        [y.append(polarity[i]) for i in train]
        y.append(0), y)
        test_label = list()
        answer_label = list()
        [answer_label.append(polarity[j]) for j in test]
        for j in test:
            query = fit_feature(feature[j], feature_dict)
            result = -1 if query.shape[1] != len(feature_dict) else prediction(LR, query, threshold)
        accuracy += accuracy_score(answer_label, test_label)
        precision += precision_score(answer_label, test_label)
        recall += recall_score(answer_label, test_label)
        f1 += f1_score(answer_label, test_label)
        print('{}_fold finished.'.format(count))

    return accuracy, precision, recall, f1
项目:100knock2016    作者:tmu-nlp    | 项目源码 | 文件源码
def get_score(preds, target, test_numbers):
    all_accuracy = []
    all_precision = []
    all_recall = []
    all_f_values = []
    for tests, pred in zip(test_numbers, preds):
        answers = []
        for test in tests:
        all_accuracy.append(accuracy_score(answers, pred))
        all_precision.append(precision_score(answers, pred))
        all_recall.append(recall_score(answers, pred))
        all_f_values.append(f1_score(answers, pred))
    return np.array(all_accuracy).mean(), np.array(all_precision).mean(), np.array(all_recall).mean(), np.array(all_f_values).mean()
项目:atap    作者:foxbook    | 项目源码 | 文件源码
def score_models(models, loader):
    for model in models:

        name = model.named_steps['classifier'].__class__.__name__
        if 'reduction' in model.named_steps:
            name += " (TruncatedSVD)"

        scores = {
            'model': str(model),
            'name': name,
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1': [],
            'time': [],

        for X_train, X_test, y_train, y_test in loader:
            start = time.time()
  , y_train)
            y_pred = model.predict(X_test)

            scores['time'].append(time.time() - start)
            scores['accuracy'].append(accuracy_score(y_test, y_pred))
            scores['precision'].append(precision_score(y_test, y_pred, average='weighted'))
            scores['recall'].append(recall_score(y_test, y_pred, average='weighted'))
            scores['f1'].append(f1_score(y_test, y_pred, average='weighted'))

        yield scores