Python sklearn.metrics 模块,classification_report() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用sklearn.metrics.classification_report()

项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'penalty': ['l1'],
                             'C': np.logspace(-5,5)},
                             {'penalty': ['l2'],
                              'C': np.logspace(-5,5)}]

        clf = GridSearchCV(linear_model.LogisticRegression(tol=1e-6), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
项目:TrackToTrip    作者:ruipgil    | 项目源码 | 文件源码
def score(train_labels, train_features, test_labels, test_features, save_file, use_tree=False):
    if use_tree:
        train_clf = Classifier(tree.DecisionTreeClassifier())
    else:
        train_clf = Classifier()

    print train_clf.clf
    print ''

    t_start = time.clock()
    train_clf.learn(train_features, train_labels)
    t_end = time.clock()
    if save_file:
        train_clf.save_to_file(open(save_file, 'w'))

    p_start = time.clock()
    predicted = train_clf.clf.predict(test_features)
    p_end = time.clock()

    test_labels_t = train_clf.labels.transform(test_labels)
    print classification_report(test_labels_t, predicted, target_names=train_clf.labels.classes_)
    print 'Training time: %fs' % (t_end - t_start)
    print 'Predicting time: %fs' % (p_end - p_start)
    print 'Mean squared error: %f' % mean_squared_error(test_labels_t, predicted)
    return train_clf.score(test_features, test_labels)
项目:KATE    作者:hugochan    | 项目源码 | 文件源码
def multiclass_classifier(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7):
    clf = softmax_network(X_train.shape[1], Y_train.shape[1])
    clf.fit(X_train, Y_train,
                        epochs=nb_epoch,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_val, Y_val),
                        callbacks=[
                                    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
                                    EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'),
                        ]
                        )
    acc = clf.test_on_batch(X_test, Y_test)[1]
    # confusion matrix and precision-recall
    true = np.argmax(Y_test,axis=1)
    pred = np.argmax(clf.predict(X_test), axis=1)
    print confusion_matrix(true, pred)
    print classification_report(true, pred)
    return acc
项目:UrbanSearch    作者:urbansearchTUD    | 项目源码 | 文件源码
def metrics_equal():
    dataset_path = dpu.generate_equal_dataset()
    dataset = dpu.load(dataset_path)
    mm = SGDCModelManager()

    mm.x_train, mm.x_test, mm.y_train, mm.y_test = train_test_split(dataset['inputs'], dataset['outputs'], random_state=42)
    mm.train()
    predicts = mm.predict(mm.x_test)

    report = classification_report(mm.y_test, predicts)

    return jsonify(status=200, message=report)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'kernel': ['rbf'],
                             'gamma': np.logspace(-4, 3, 30),
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]},
                             {'kernel': ['poly'],
                              'degree': [1, 2, 3, 4],
                              'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
                              'coef0': np.logspace(-4, 3, 30)},
                            {'kernel': ['linear'],
                             'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000]}]

        clf = GridSearchCV(svm.SVC(C=1), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'weights': ['uniform', 'distance'],
                             'n_neighbors': range(2,60)
                             }
                            ]


        clf = GridSearchCV(neighbors.KNeighborsClassifier(), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
项目:AirTicketPredicting    作者:junlulocky    | 项目源码 | 文件源码
def parameterChoosing(self):
        # Set the parameters by cross-validation
        tuned_parameters = [{'max_depth': range(20,60),
                             'n_estimators': range(10,40),
                             'max_features': ['sqrt', 'log2', None]
                             }
                            ]

        clf = GridSearchCV(RandomForestClassifier(n_estimators=30), tuned_parameters, cv=5, scoring='precision_weighted')
        clf.fit(self.X_train, self.y_train.ravel())

        print "Best parameters set found on development set:\n"
        print clf.best_params_

        print "Grid scores on development set:\n"
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r\n" % (mean_score, scores.std() * 2, params)

        print "Detailed classification report:\n"
        y_true, y_pred = self.y_test, clf.predict(self.X_test)
        print classification_report(y_true, y_pred)
项目:monasca-analytics    作者:openstack    | 项目源码 | 文件源码
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        lr_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = lr_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return lr_detector
项目:monasca-analytics    作者:openstack    | 项目源码 | 文件源码
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        svc_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = svc_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return svc_detector
项目:monasca-analytics    作者:openstack    | 项目源码 | 文件源码
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        dt_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = dt_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return dt_detector
项目:monasca-analytics    作者:openstack    | 项目源码 | 文件源码
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        rf_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = rf_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return rf_detector
项目:entity_binding    作者:JasperGuo    | 项目源码 | 文件源码
def main(log_file, table_file):
    """
    :param log_file:
    :param table_file:
    :return:
    """
    tables = read_tables(table_file)
    table_dict = build_table_dict(tables)
    questions = read_log(log_file)
    truth = list()
    prediction = list()
    for q in questions:
        process(q, table_dict[q["tid"]])
        t, p = recalc_index(q)
        truth += t
        prediction += p

    file_base_name = os.path.basename(log_file)
    dirname = os.path.dirname(log_file)
    file = os.path.join(dirname, "processed_" + file_base_name)
    report = classification_report(truth, prediction, target_names=["PAT", "LIT", "TAB", "COL", "CELL"])
    save(questions, report, file)
项目:traffic-v2    作者:vnetserg    | 项目源码 | 文件源码
def score_model(model, data_test, labeler):
    '''
        ??????? ?????????????????? ??????,
        ?????? ? ??????????? ????? ??? ???????:
        ???????? ?????????, ???????? ??????? ?
        ???????? ??? ??????? ??????, ????????
        ? ????????????? ??????.
        ?????????:
            model - ????????? ??????
            data_test - ??????????? ???????
            labeler - LabelEncoder ?????? ???????
        ??????????:
            ??????
    '''
    X_test = data_test.drop(["proto"], axis=1)
    y_test = data_test["proto"]
    y_predicted = model.predict(X_test)

    true_labels = labeler.inverse_transform(y_test)
    predicted_labels = labeler.inverse_transform(y_predicted)

    print feature_importances_report(model, X_test.columns)
    print "\n", classification_report(true_labels, predicted_labels)
    print cross_class_report(true_labels, predicted_labels)
项目:RIDDLE    作者:jisungk    | 项目源码 | 文件源码
def evaluate(y_test, y_test_proba, nb_classes, path):
    from riddle import roc # here so np can be seeded before run_pipeline() call

    y_pred = [np.argmax(p) for p in y_test_proba]

    print('Confusion matrix:')
    print(confusion_matrix(y_test, y_pred))
    print()

    print('Classification report:')
    print(classification_report(y_test, y_pred, digits=3))

    print('ROC AUC values:')
    roc_auc, fpr, tpr = roc.compute_roc(y_test, y_test_proba, 
        nb_classes=nb_classes)
    roc.save_plots(roc_auc, fpr, tpr, nb_classes=nb_classes, path=path)

    for l, r in roc_auc.items():
        print('  {}: {:.5f}'.format(l, r))
    print()

# ---------------------------- PUBLIC FUNCTIONS ------------------------------ #
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)

        self.classifier = SVC()
        self.classifier.fit(self.trainingSet, self.trainingLabel)
        pred_labels = {}
        for user in self.testDict:
            pred_labels[user] = self.classifier.predict([[self.BDS[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel

        return pred_labels
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)

        corpus = [dictionary.doc2bow(text) for text in corpus]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        corpus = self.trainingSet+self.testSet
        dictionary = corpora.Dictionary(corpus)
        corpus = [dictionary.doc2bow(text) for text in corpus]
        model = models.TfidfModel(corpus)
        corpus = [text for text in model[corpus]]
        text_matrix = gensim.matutils.corpus2dense(corpus, num_terms=len(dictionary.token2id)).T

        if PCA_Applied:
            pca = PCA(n_components=PCA_nComponents)
            text_matrix = pca.fit_transform(text_matrix)

        classifier = LogisticRegression()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'Logistic:'
        print classification_report(self.testLabel, pred_labels)

        classifier = SVC()
        classifier.fit(text_matrix[0:len(self.trainingSet)], self.trainingLabel)
        pred_labels = classifier.predict(text_matrix[len(self.trainingSet):])
        print 'SVM:'
        print classification_report(self.testLabel, pred_labels)
项目:OpinionSpam    作者:Coder-Yu    | 项目源码 | 文件源码
def fitAndPredict(self):
        # classifier = LogisticRegression()
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Logistic:'
        # print classification_report(self.testLabel, pred_labels)
        pred_labels = {}
        classifier = SVC()
        classifier.fit(self.trainingSet, self.trainingLabel)

        for user in self.testDict:
            pred_labels[user] = classifier.predict([[self.MUD[user], self.RUD[user], self.QUD[user]]])
        # print 'SVM:'
        # print classification_report(self.testLabel, pred_labels)
        return pred_labels

        # classifier = DecisionTreeClassifier(criterion='entropy')
        # classifier.fit(self.trainingSet, self.trainingLabel)
        # pred_labels = classifier.predict(self.testSet)
        # print 'Decision Tree:'
        # print classification_report(self.testLabel, pred_labels)
        # return self.trainingSet, self.trainingLabel, self.testSet, self.testLabel
项目:wende    作者:h404bi    | 项目源码 | 文件源码
def test_model(self, n_folds=10):
        """ ?? `??K-??????Stratified K-folds cross-validating?`
            ???????
        """
        logging.debug("testing model with {}-folds CV".format(n_folds))
        model = self.init_model()
        X = self.data.data
        y = self.data.target

        cv = cross_validation.StratifiedKFold(y, n_folds=n_folds, random_state=42)

        t0 = time()
        y_pred = cross_validation.cross_val_predict(model, X=X, y=y, n_jobs=-1, cv=cv)
        t = time() - t0
        print("=" * 52)
        print("time cost: {}".format(t))
        print()
        print("confusion matrix\n", metrics.confusion_matrix(y, y_pred))
        print()
        print("\t\taccuracy: {}".format(metrics.accuracy_score(y, y_pred)))
        print()
        print("\t\tclassification report")
        print("-" * 52)
        print(metrics.classification_report(y, y_pred))
项目:MyCommentOnTensorFlowModel    作者:guotong1988    | 项目源码 | 文件源码
def test(self):
        lenW = len(self.vectorizer.vocabulary_)
        W = 3*lenW
        Y_true = []
        Y_pred = []
        for i,line in enumerate(self.test_lines):
            if line['type'] == 'q':
                r = line['answer']
                id = line['id']-1
                indices = [idx for idx in range(i-id, i+1)]
                memory_list = self.L_test[indices]

                m_o1 = O_t([id], memory_list, self.s_Ot)
                m_o2 = O_t([id, m_o1], memory_list, self.s_Ot)

                bestVal = None
                best = None
                for w in self.vectorizer.vocabulary_:
                    val = self.sR([id, m_o1, m_o2], self.H[w], memory_list, self.V)
                    if bestVal is None or val > bestVal:
                        bestVal = val
                        best = w
                Y_true.append(r)
                Y_pred.append(best)
        print metrics.classification_report(Y_true, Y_pred)
项目:HappyCat    作者:sparktsao    | 项目源码 | 文件源码
def MyEvaluation(y_test,predicted):
    def norm_me(x):
        if str(type(x)).find("int")>-1:
            return x
        zix = np.argmax(x)
        x1 = [0]*len(x)
        x1[zix] = 1
        return x1
    predicted = [norm_me(x) for x in predicted]
    predicted = np.array(predicted,dtype="uint8")

    target_names  = ['normal','malware']
    inv_map = {v: k for k, v in KLABEL.items()}
    target_names = [inv_map[x] for x in range(WORKING_KLABEL)]
    result = classification_report(y_test,predicted,target_names=target_names)
    print result

    averagelabel = 'binary'
    if B_MULTICLASS: averaegelabel = "macro"

    v_precision = precision_score(y_test,predicted, average=averagelabel)
    v_recall = recall_score(y_test,predicted, average=averagelabel)    

    (TP, FP, TN, FN) = perf_measure(y_test, predicted,KLABEL["malicious"])
    return v_precision,v_recall,TP, FP, TN, FN
项目:odin    作者:imito    | 项目源码 | 文件源码
def classification_report(y_pred, y_true, labels):
  """
  Parameters
  ----------
  pass

  Return
  ------
  Classification report in form of string
  """
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
  # ====== validate labels ====== #
  labels = as_tuple(labels)
  target_names = [str(i) for i in labels]
  labels = list(range(0, len(labels)))
  # ====== create report ====== #
  s = ""
  s += "Accuracy: %f\n" % accuracy_score(y_true, y_pred, normalize=True)
  s += "Confusion matrix:\n"
  s += str(confusion_matrix(y_true, y_pred, labels=labels)) + '\n'
  s += "Report:\n"
  s += str(classification_report(y_true, y_pred, labels=labels, digits=3,
                                 target_names=target_names))
  return s
项目:static-gesture-recognition    作者:windmark    | 项目源码 | 文件源码
def splitValidateModel(self, visualizePredictions = False):
    (label_vector, input_vector) = loadData(self.featureFile)

    indexArray = range(0, len(input_vector))
    trainData, testData, trainLabels, expectedLabels, trainIndices, testIndices = \
      cross_validation.train_test_split(input_vector, label_vector, indexArray, test_size=(1.0 - self.percentSplit))

    kNNClassifier = neighbors.KNeighborsClassifier(self.n_neighbors, weights='distance')
    kNNClassifier.fit(trainData, trainLabels) 
    predictedLabels = kNNClassifier.predict(testData)

    print("Classification report for classifier %s:\n%s\n"
          % ('k-NearestNeighbour', metrics.classification_report(expectedLabels, predictedLabels)))
    print("Confusion matrix:\n%s" % metrics.confusion_matrix(expectedLabels, predictedLabels))
    print('Split Validation training :: Done.\n')

    if visualizePredictions:
      self.__visualizePredictedDataset__(input_vector, testIndices, predictedLabels, expectedLabels)
项目:rbm_based_autoencoders_with_tensorflow    作者:ikhlestov    | 项目源码 | 文件源码
def test_svm_estimator(estimator, notes, encodings_train, labels_train,
                       encodings_test, labels_test):
    t0 = time()
    estimator.fit(encodings_train, labels_train)
    print("Time cons: %.2fs, type: %s" % (time() - t0, notes))
    predicted = estimator.predict(encodings_test)
    accuracy = metrics.accuracy_score(labels_test, predicted)
    print("Accuracy: %.5f" % accuracy)
    report = metrics.classification_report(labels_test, predicted)
    print(report)
    prec_recall_f_score = metrics.precision_recall_fscore_support(
        labels_test, predicted)
    print('-' * 10)
    prec_recall_f_score_dict = {
        'prec': np.mean(prec_recall_f_score[0]),
        'recall': np.mean(prec_recall_f_score[1]),
        'f_score': np.mean(prec_recall_f_score[2])
    }
    return accuracy, prec_recall_f_score_dict
项目:keyphrase-extraction    作者:sagarchaturvedi1    | 项目源码 | 文件源码
def classify(y_true, y_pred):

    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(list(chain.from_iterable(y_true)))
    y_pred_combined = lb.transform(list(chain.from_iterable(y_pred)))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels = [class_indices[cls] for cls in tagset],
        target_names = tagset,
    )
项目:productner    作者:etano    | 项目源码 | 文件源码
def evaluate(self, x_test, y_test, batch_size=256):
        """Evaluate classifier

        Args:
            x_test (np.array): 3D numpy array (n_samples, embedding_dim, tokenizer.max_sequence_length)
            y_test (np.array): 2D numpy array (n_samples, len(self.category_map))
            batch_size (int): Training batch size
        """
        print('Evaluating...')
        predictions_last_epoch = self.model.predict(x_test, batch_size=batch_size, verbose=1)
        predicted_classes = np.argmax(predictions_last_epoch, axis=1)
        target_names = ['']*len(self.category_map)
        for category in self.category_map:
            target_names[self.category_map[category]] = category
        y_val = np.argmax(y_test, axis=1)
        print(classification_report(y_val, predicted_classes, target_names=target_names, digits = 6))
项目:rnn-sentiment-analysis    作者:kashizui    | 项目源码 | 文件源码
def evaluate(args, model, data):
    train_predict = model.predict(data.trainX)
    print("TRAINING RESULTS")
    print(classification_report(
        [e[1] for e in data.trainY],
        [utils.get_sentiment(e[1]) for e in train_predict],
    ))
    print()

    test_predict = model.predict(data.valX)
    print("DEV RESULTS")
    print(classification_report(
        [e[1] for e in data.valY],
        [utils.get_sentiment(e[1]) for e in test_predict],
    ))
    print()

    if args['--evaluate-test']:
        test_predict = model.predict(data.testX)
        print("TEST RESULTS")
        print(classification_report(
            [e[1] for e in data.testY],
            [utils.get_sentiment(e[1]) for e in test_predict],
        ))
        print()
项目:dancedeets-monorepo    作者:mikelambert    | 项目源码 | 文件源码
def eval_model(name, model, data):
    print '=' * 20
    print name, 'training'
    model.fit(data, train.target, sample_weight=sample_weights)
    print name, 'trained'

    predictions = model.predict(processed_test_data)
    print name, 'accuracy', np.mean(predictions == test.target)

    print(metrics.classification_report(test.target, predictions))
    print metrics.confusion_matrix(test.target, predictions)

    print name, 'f1 cross validation', cross_validation.cross_val_score(model, grammar_processed_data, train.target, scoring='f1')
    print name, 'precision cross validation', cross_validation.cross_val_score(
        model, grammar_processed_data, train.target, scoring='precision'
    )
    return model, predictions


# SVM need balance on input features, same ranges and variances and stuff like that
项目:lstm-crf-ner    作者:qfzxhy    | 项目源码 | 文件源码
def bio_classification_report(y_gold,y_pred):
    #y_gold: [[],[],[]]
    #y_pred:
    lb = LabelBinarizer()
    y_gold_combined = lb.fit_transform(list(chain.from_iterable(y_gold)))
    y_pred_combined = lb.fit_transform(list(chain.from_iterable(y_pred)))

    tagset = set(lb.classes_) - {'O'}
    tagset = sorted(tagset,key=lambda tag: tag.split('-',1)[::-1])
    class_indices = {cls:idx for idx,cls in enumerate(lb.classes_)}
    return classification_report(
        y_gold_combined,
        y_pred_combined,
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset
    )
项目:DialogueBreakdownDetection2016    作者:icoxfog417    | 项目源码 | 文件源码
def classify():
    reader = DbdReader(DATA_DIR, TRAIN_PATH, target_for_vocabulary=TARGET_PATH, max_vocabulary_size=_vocab_size_, filter="140", threshold=0.6, clear_when_exit=False)
    reader.init()
    dataset, user_vocab, system_vocab = reader.get_dataset()

    labels = reader.get_labels()
    model = make_model(user_vocab, system_vocab)
    model_if = model.create_interface(_buckets_, TRAIN_DIR)

    train_x, test_x, train_t, test_t = train_test_split(dataset, labels, test_size=0.2, random_state=42)

    with tf.Session() as sess:
        detector = Detector(sess, model_if)
        detector.train(sess, train_x, train_t)
        y = [detector.predict(sess, p) for p in test_x]
        y = [lb for lb, prob in y]

    report = classification_report([lb.label for lb in test_t], y, target_names=DbdReader.get_label_names())
    print(report)
项目:DialogueBreakdownDetection2016    作者:icoxfog417    | 项目源码 | 文件源码
def test_detector(self):
        dataset, user_vocab, system_vocab = self.Reader.get_dataset()
        _labels = self.Reader.get_labels()
        labels = [lb.label for lb in _labels]
        model = self.make_model(user_vocab, system_vocab)
        model_if = model.create_interface(self.buckets, self.TRAIN_DIR)

        train_x, test_x, train_t, test_t = train_test_split(dataset, labels, test_size=0.2, random_state=42)

        with tf.Session() as sess:
            detector = Detector(sess, model_if)
            detector.train(sess, train_x, train_t)
            y = [detector.predict(sess, p) for p in test_x]

        report = classification_report(test_t, y, target_names=DbdReader.get_label_names())
        print(report)
项目:email-segmentation    作者:gorgias    | 项目源码 | 文件源码
def train_segmenter(self, data, targets, target_names, test=True):
        '''
        Trains a support vector machines classifier and returns the 
        trained model and test report if test flag was on.
        '''

        X_train, X_test, y_train, y_test= train_test_split(data, 
                                targets, test_size=0.2, random_state=42)
        svc = SVC(probability=True)
        if test:
            clf = svc.fit(X_train, y_train)
            pred= clf.predict(X_test)
            report = classification_report(y_test, pred,
                                    target_names=target_names)
            return clf, report
        else:
            clf = svc.fit(data, targets)
            return clf
项目:ZZZZ    作者:Phonicavi    | 项目源码 | 文件源码
def backtestHistory(_initial_virtual_shares, _start_date, _stockcode, _interval,_train_batch_size = 100):
    ZZZZ = Investor(_name='ZZZZ', _initial_virtual_shares=_initial_virtual_shares, _start_date=_start_date, _stockcode=_stockcode, _interval=_interval,_train_batch_size = _train_batch_size)
    total = ZZZZ.maxcnt-ZZZZ.now
    # pbar = ProgressBar(widgets=[' ', AnimatedMarker(), 'Predicting: ', Percentage()], maxval=total).start()
    while ZZZZ.now < ZZZZ.maxcnt:
        # pbar.update(ZZZZ.now)
        # time.sleep(0.01)
        ZZZZ.TradeNext(use_NN=False)
    # pbar.finish()

    print
    print classification_report(ZZZZ.TRUEY, ZZZZ.PREDY)
    f1 = f1_score(ZZZZ.TRUEY, ZZZZ.PREDY)
    accuracy = accuracy_score(ZZZZ.TRUEY, ZZZZ.PREDY)
    print "accuracy:", accuracy
    print "f1: ",f1
    predROR = ZZZZ.getTotalROR()[0]
    realROR = ZZZZ.getTotalROR()[1]
    assert not (realROR == 0)
    print 'pred ROR:', predROR, '%', '\t|\treal ROR:', realROR, '%'

    return predROR, realROR, f1, accuracy, total, ZZZZ.TRAINERROR
项目:snape    作者:mbernico    | 项目源码 | 文件源码
def score_binary_classification(y, y_hat, report=True):
    """
    Create binary classification output
    :param y: true value
    :param y_hat: class 1 probabilities
    :param report:
    :return:
    """
    y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat]  # convert probability to class for classification report

    report_string = "---Binary Classification Score--- \n"
    report_string += classification_report(y, y_hat_class)
    score = roc_auc_score(y, y_hat)
    report_string += "\nAUC = " + str(score)

    if report:
        print(report_string)

    return score, report_string
项目:snape    作者:mbernico    | 项目源码 | 文件源码
def score_multiclass_classification(y, y_hat, report=True):
    """
    Create multiclass classification score
    :param y:
    :param y_hat:
    :return:
    """
    report_string = "---Multiclass Classification Score--- \n"
    report_string += classification_report(y, y_hat)
    score = accuracy_score(y, y_hat)
    report_string += "\nAccuracy = " + str(score)

    if report:
        print(report_string)

    return score, report_string
项目:LeaguePredictor    作者:dgarwin    | 项目源码 | 文件源码
def get_save_results(X_train, X_test, y_train, y_test, model, description, params=None):
    # Fit model and log experiment
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    write = description + '\n'
    if hasattr(model, 'best_params_'):
        write += 'Best params: ' + str(model.best_params_) + '\n'
    if params:
        write += 'Params: ' + str(params) + '\n'
    write += 'Training Score: ' + str(model.score(X_train, y_train)) + '\n'
    write += 'Testing Score: ' + str(model.score(X_test, y_test)) + '\n'
    if description == 'NN':
        y_test = pd.DataFrame(y_test).stack()
        y_test = pd.Series(pd.Categorical(y_test[y_test != 0].index.get_level_values(1)))
    write += str(classification_report(y_test, predictions)) + '\n'
    write += str(confusion_matrix(y_test, predictions)) + '\n'
    print write
    with open('notes/experiments', 'a') as f:
        f.write(write)
    return model
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_RandomizedSearchCV():

    '''
    Use RandomizedSearchCV and LogisticRegression, to improve C, multi_class.
    :return:  None
    '''
    digits = load_digits()
    X_train,X_test,y_train,y_test=train_test_split(digits.data, digits.target,
                test_size=0.25,random_state=0,stratify=digits.target)

    tuned_parameters ={  'C': scipy.stats.expon(scale=100),
                        'multi_class': ['ovr','multinomial']}
    clf=RandomizedSearchCV(LogisticRegression(penalty='l2',solver='lbfgs',tol=1e-6),
                        tuned_parameters,cv=10,scoring="accuracy",n_iter=100)
    clf.fit(X_train,y_train)
    print("Best parameters set found:",clf.best_params_)
    print("Randomized Grid scores:")
    for params, mean_score, scores in clf.grid_scores_:
             print("\t%0.3f (+/-%0.03f) for %s" % (mean_score, scores.std() * 2, params))

    print("Optimized Score:",clf.score(X_test,y_test))
    print("Detailed classification report:")
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
项目:ScoreCardModel    作者:data-science-tools    | 项目源码 | 文件源码
def Score_to_threshold(clz, X_score, *, y=None, score=100, round_=4):
        """??????????????????????,??????????????.
        ???????????????????????.

        Attributes:

            X_score (Sequence[number]): - ?????
            y (Sequence[number]): - ?????,????
            score (number): - ???,?????T,???F
        """

        score_array = np.array(X_score)
        if y is not None:
            print(precision_score(y, (score_array > score), average='macro'))
            print(classification_report(y, (score_array > score)))
        return round(len(score_array[score_array > score]) / len(score_array),
                     round_)
项目:vangogh    作者:gfolego    | 项目源码 | 文件源码
def eval_perf(classification):
    y_true = []
    y_pred = []

    for (key, value) in classification.iteritems():
        y_true.extend([parse_class(key)])
        y_pred.extend([value])

        print_verbose("Classification pair: %s" % str((key, value)), 4)
        print_verbose("True classes: %s" % str(y_true), 5)
        print_verbose("Predicted classes: %s" % str(y_pred), 5)

    # Print results
    print_verbose("True classes: %s" % str(y_true), 2)
    print_verbose("Predicted classes: %s" % str(y_pred), 2)

    # Print metrics
    print_verbose("Confusion Matrix:", 0)
    print_verbose(metrics.confusion_matrix(y_true, y_pred), 0)
    print_verbose("Classification Report:", 0)
    print_verbose(metrics.classification_report(y_true, y_pred), 0)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_classification_report_multiclass_with_long_string_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    labels = np.array(["blue", "green"*5, "red"])
    y_true = labels[y_true]
    y_pred = labels[y_pred]

    expected_report = """\
                           precision    recall  f1-score   support

                     blue       0.83      0.79      0.81        24
greengreengreengreengreen       0.33      0.10      0.15        31
                      red       0.42      0.90      0.57        20

              avg / total       0.51      0.53      0.47        75
"""

    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report)
项目:stance-conditional    作者:sheffieldnlp    | 项目源码 | 文件源码
def __call__(self, sess, epoch, iteration, model, loss):
        if iteration == 0 and epoch % self.at_every_epoch == 0:
            total = 0
            correct = 0
            truth_all = []
            pred_all = []
            for values in self.batcher:
                total += len(values[-1])
                feed_dict = {}
                for i in range(0, len(self.placeholders)):
                    feed_dict[self.placeholders[i]] = values[i]
                truth = np.argmax(values[-1], 1)  # values[2], batch sampled from data[2], is a 3-legth one-hot vector containing the labels. this is to transform those back into integers
                predicted = sess.run(tf.arg_max(tf.nn.softmax(model), 1),
                                     feed_dict=feed_dict)
                correct += sum(truth == predicted)
                truth_all.extend(truth)
                pred_all.extend(predicted)
            print(classification_report(truth_all, pred_all, target_names=["NONE", "AGAINST", "FAVOR"], digits=4))
项目:jingjuPhoneticSegmentation    作者:ronggong    | 项目源码 | 文件源码
def report_cv(clf,fv_test,target_test):

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    for params, mean_score, scores in clf.grid_scores_:
        print("%0.3f (+/-%0.03f) for %r"
              % (mean_score, scores.std() * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    target_true, target_pred = target_test, clf.predict(fv_test)
    print(classification_report(target_true, target_pred))
    print()
项目:Sentiment-Analysis    作者:jasonwu0731    | 项目源码 | 文件源码
def on_epoch_end(self, epoch, logs={}):
        print("Generating Classification Report:")
        pred = np.argmax(self.model.predict(self.x_eval), axis=1)
        truth = np.argmax(self.y_eval, axis=1)
        target_names = [self.labels[i] for i in range(len(self.labels))]
        print(classification_report(truth, pred, target_names=target_names))
项目:scik-learn-learn-Chinese-text-classider    作者:chapzq77    | 项目源码 | 文件源码
def predict_result_report(actual,predict,catetory):
    print(metrics.classification_report(actual,predict,target_names=catetory))
项目:UrbanSearch    作者:urbansearchTUD    | 项目源码 | 文件源码
def train_test_equal():
    dataset_path = dpu.generate_equal_dataset()
    dataset = dpu.load(dataset_path)
    mm = SGDCModelManager()

    mm.x_train, mm.x_test, mm.y_train, mm.y_test = train_test_split(dataset['inputs'], dataset['outputs'], random_state=42)
    mm.train()
    score = mm.score()

    probabilities = mm.predict(mm.x_test)
    print(mm.score())
    print(classification_report(mm.y_test, probabilities))

    return jsonify(status=200, score=score)
项目:sentiment-analysis    作者:lplping    | 项目源码 | 文件源码
def Precision(clf):
    doc_class_predicted = clf.predict(x_test) 
    print(np.mean(doc_class_predicted == y_test))#?????????
    #???????  
    precision, recall, thresholds = precision_recall_curve(y_test, clf.predict(x_test))  
    answer = clf.predict_proba(x_test)[:,1]  
    report = answer > 0.5  
    print(classification_report(y_test, report, target_names = ['neg', 'pos']))
    print("--------------------")
    from sklearn.metrics import accuracy_score
    print('???: %.2f' % accuracy_score(y_test, doc_class_predicted))
项目:bguFinalProject    作者:liranfar    | 项目源码 | 文件源码
def print_confusion_matrix(y_test, nb_predict_test):
    print ("Confusion Matrix")
    print("{0}".format(metrics.confusion_matrix(y_test, nb_predict_test, labels=['malware', 'benign'])))
    print("")
    print("Classification Report")
    print(metrics.classification_report(y_test, nb_predict_test, labels=['malware', 'benign']))
项目:Sensor-Specific-Hyperspectral-Image-Feature-Learning    作者:MeiShaohui    | 项目源码 | 文件源码
def get_metric(self):
        self.get_y_pred()
        #self.get_ip1()
        self.y_true = self.label
        self.y_pred = self.feature.argmax(1)
        self.classify_report = metrics.classification_report(self.y_true, self.y_pred)
        self.confusion_matrix = metrics.confusion_matrix(self.y_true, self.y_pred)
        self.overall_accuracy = metrics.accuracy_score(self.y_true, self.y_pred)
        self.acc_for_each_class = metrics.precision_score(self.y_true, self.y_pred, average=None)
        self.average_accuracy = np.mean(self.acc_for_each_class)
        print metrics.accuracy_score(self.y_true, self.y_pred)
项目:bionlp17    作者:leebird    | 项目源码 | 文件源码
def evaluate(self, test_examples, test_labels):
        predictions = self.predict(test_examples)
        print(classification_report(test_labels, predictions))
项目:tefla    作者:litan    | 项目源码 | 文件源码
def get_metrics(actual_labels_file, predict_labels_file):
    util.check_required_program_args([actual_labels_file, predict_labels_file])
    actual_labels_df = pd.read_csv(actual_labels_file, names=['image', 'label'], header=0)
    predict_labels_df = pd.read_csv(predict_labels_file, names=['image', 'label'], header=0)

    # assumes equal number of items in both file
    assert (actual_labels_df['image'].count()) == predict_labels_df['image'].count()

    actual_labels_df = actual_labels_df.sort_values(by=['image'])
    predict_labels_df = predict_labels_df.sort_values(by=['image'])
    assert (list(actual_labels_df['image'].values) == list(predict_labels_df['image'].values))

    # Hopefully y_true and y_pred are alligned properly.
    y_labels = actual_labels_df['image'].values
    y_true = actual_labels_df['label'].values
    y_pred = predict_labels_df['label'].values

    print "Confusion matrix:"
    print confusion_matrix(y_true, y_pred)
    print ""
    print "Classification report:"
    print classification_report(y_true, y_pred)

    accuracy = accuracy_score(y_true, y_pred)
    kappa = quadratic_weighted_kappa(y_true, y_pred)

    print('Accuracy: %.4f' % accuracy)
    print('Kappa: %.4f' % kappa)
    print ""