Python sklearn.metrics 模块，log_loss() 实例源码

我们从Python开源项目中，提取了以下49个代码示例，用于说明如何使用sklearn.metrics.log_loss()。

项目：mlbootcamp_5 作者：ivan-filonov | 项目源码 | 文件源码

def rf1(train2, y, test2, v, z):
    cname = sys._getframe().f_code.co_name
    v[cname], z[cname] = 0, 0
    N_splits = 300
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    for n, (itrain, ival) in enumerate(skf.split(train2, y)):
        print('step %d of %d'%(n+1, skf.n_splits), now())
        clf = ensemble.RandomForestRegressor(n_estimators=1000,
                                             max_depth=3,
                                             random_state=13)
        clf.fit(train2[itrain], y[itrain])

        p = clf.predict(train2[ival])
        v.loc[ival, cname] += p
        score = metrics.log_loss(y[ival], p)
        z[cname]  += np.log1p(clf.predict(test2))
        print(cname, 'step %d: score'%(n+1), score, now())
        scores.append(score)

    print('validation loss: ', metrics.log_loss(y, v[cname]))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())
    z[cname] /= N_splits

项目：stacked_generalization 作者：fukatani | 项目源码 | 文件源码

def test_stacked_classfier_extkfold(self):
        bclf = LogisticRegression(random_state=1)
        clfs = [RandomForestClassifier(n_estimators=40, criterion = 'gini', random_state=1),
                RidgeClassifier(random_state=1),
                ]
        sl = StackedClassifier(bclf,
                               clfs,
                               n_folds=3,
                               verbose=0,
                               Kfold=StratifiedKFold(self.iris.target, 3),
                               stack_by_proba=False,
                               oob_score_flag=True,
                               oob_metrics=log_loss)
        sl.fit(self.iris.data, self.iris.target)
        score = sl.score(self.iris.data, self.iris.target)
        self.assertGreater(score, 0.9, "Failed with score = {0}".format(score))

项目：kaggle_airbnb 作者：svegapons | 项目源码 | 文件源码

def opt_2_obj_func(w, X, y, n_class):
    """
    Function to be minimized in the EN_OPT_2 ensembler.
    In this case there is only one weight for each classification restlt to be 
    combined.
    Parameters:
    ----------
    w: ndarray size=(n_preds)
       Candidate solution to the optimization problem (vector of weights).
    X: ndarray size=(n_samples, n_preds * n_class)
       Solutions to be combined horizontally concatenated.
    y: ndarray size=(n_samples,)
       Class labels
    n_class: int
       Number of classes in the problem, i.e. = 12
    """
    w = np.abs(w)
    sol = np.zeros((X.shape[0], n_class))
    for i in range(len(w)):
        sol += X[:, i*n_class:(i+1)*n_class] * w[i]
    #Minimizing the logloss   
    sc_ll = log_loss(y, sol)
    return sc_ll

项目：KaggleExeter 作者：detomo | 项目源码 | 文件源码

def cross_validate(train):
    #separate training and validation set
    X_train,X_valid= split_train_validation(train)
    scores = []; preds = []
    for i in xrange(len(X_train)):
        #convert X_train, Y_train etc... to xgboost matrix
        dtrain = xgb.DMatrix(X_train[i][['phone_brand','device_model','timestamp']], label = X_train[i]['group'],missing=np.nan) 
        dvalid = xgb.DMatrix(X_valid[i][['phone_brand','device_model','timestamp']], label = X_valid[i]['group'],missing=np.nan)

        #predict with xgboost
        parameters = {'max_depth':4,'eta':0.1,'silent':1, 'subsample':0.8,'colsample_bytree':0.8,
                'objective':'multi:softprob','booster':'gbtree','early_stopping_rounds':50,
                'num_class':12,'num_boost_round':1000,'eval_metric':'mlogloss'}
        plst = parameters.items()
        bst = xgb.train(plst, dtrain)
        pred = bst.predict(dvalid)

        scores.append(log_loss(X_valid[i]['group'].tolist(),pred))
        pred = pd.DataFrame(pred, index = X_valid[i].index, columns=target_encoder.classes_)
        preds.append(pred)
    return scores, preds

项目：tencent_social_algo 作者：Folieshell | 项目源码 | 文件源码

def check_log_loss(max_depth, n_splits, test_size):
    model = RandomForestClassifier(max_depth=max_depth, n_jobs=-1, random_state=777)
    trn_scores = []
    vld_scores = []
    sss = StratifiedShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=777)
    for i, (t_ind, v_ind) in enumerate(sss.split(feature_train, trainY)):
        print('# Iter {} / {}'.format(i + 1, n_splits))
        x_trn = feature_train.values[t_ind]
        y_trn = trainY[t_ind]
        x_vld = feature_train.values[v_ind]
        y_vld = trainY[v_ind]

        model.fit(x_trn, y_trn)

        score = log_loss(y_trn, model.predict_proba(x_trn))
        trn_scores.append(score)

        score = log_loss(y_vld, model.predict_proba(x_vld))
        vld_scores.append(score)

    print("max_depth: %d   n_splits: %d    test_size: %f" % (max_depth, n_splits, test_size))
    print('# TRN logloss: {}'.format(np.mean(trn_scores)))
    print('# VLD logloss: {}'.format(np.mean(vld_scores)))

项目：bnp 作者：mpearmain | 项目源码 | 文件源码

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

项目：bnp 作者：mpearmain | 项目源码 | 文件源码

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

项目：bnp 作者：mpearmain | 项目源码 | 文件源码

def runET(train_X, train_y, test_X, test_y=None, validation=1, n_est_val=50, depth_val=None, split_val=2, leaf_val=1, feat_val='auto', jobs_val=4, random_state_val=0):
        clf = ensemble.ExtraTreesClassifier(
                n_estimators = n_est_val,
                max_depth = depth_val,
                min_samples_split = split_val,
                min_samples_leaf = leaf_val,
                max_features = feat_val,
                criterion='entropy',
                n_jobs = jobs_val,
                random_state = random_state_val)
        clf.fit(train_X, train_y)
        pred_train_y = clf.predict_proba(train_X)[:,1]
        pred_test_y = clf.predict_proba(test_X)[:,1]

        if validation:
                train_loss = log_loss(train_y, pred_train_y)
                loss = log_loss(test_y, pred_test_y)
                print "Train, Test loss : ", train_loss, loss
                return pred_test_y, loss
        else:
                return pred_test_y

项目：bnp 作者：mpearmain | 项目源码 | 文件源码

def extratreescv(n_estimators,
                 min_samples_split,
                 min_samples_leaf,
                 max_features,
                 max_depth,
                 min_weight_fraction_leaf
                 ):

    clf = ExtraTreesClassifier(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               min_samples_leaf=int(min_samples_leaf),
                               max_features= int(max_features),
                               max_depth = int(max_depth),
                               min_weight_fraction_leaf = min_weight_fraction_leaf,
                               n_jobs=-1,
                               random_state=1234,
                               verbose=1)

    clf.fit(x0, y0)
    ll = -log_loss(y1, clf.predict_proba(x1)[:,1])
    return ll

项目：mlbootcamp_5 作者：ivan-filonov | 项目源码 | 文件源码

def xgb_base(train2, y, test2, v, z, xgb_params, N_splits, N_seeds, cname, base_seed=42):
    v[cname], z[cname] = 0, 0
    scores = []
    skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
    dtest = xgb.DMatrix(test2)
    for s in range(N_seeds):
        xgb_params['seed'] = s + base_seed
        for n, (itrain, ival) in enumerate(skf.split(train2, y)):
            dtrain = xgb.DMatrix(train2.ix[itrain], y[itrain])
            dvalid = xgb.DMatrix(train2.ix[ival], y[ival])
            watch = [(dtrain, 'train'), (dvalid, 'valid')]
            clf = xgb.train(xgb_params, dtrain, 10000, watch, early_stopping_rounds=100, verbose_eval=False)

            p = clf.predict(dvalid)
            v.loc[ival, cname] += pconvert(p)
            score = metrics.log_loss(y[ival], p)
            z[cname]  += pconvert(clf.predict(dtest))
            print(cname, 'seed %d step %d of %d: '%(xgb_params['seed'], n+1, skf.n_splits), score, now())
            scores.append(score)

    z[cname] /= N_splits * N_seeds
    v[cname] /= N_seeds
    print('validation loss: ', metrics.log_loss(y, prestore(v[cname])))
    cv=np.array(scores)
    print(cv, cv.mean(), cv.std())

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def pac_metric (solution, prediction, task='binary.classification'):
    ''' Probabilistic Accuracy based on log_loss metric. 
    We assume the solution is in {0, 1} and prediction in [0, 1].
    Otherwise, run normalize_array.''' 
    debug_flag=False
    [sample_num, label_num] = solution.shape
    if label_num==1: task='binary.classification'
    eps = 1e-15
    the_log_loss = log_loss(solution, prediction, task)
    # Compute the base log loss (using the prior probabilities)    
    pos_num = 1.* sum(solution) # float conversion!
    frac_pos = pos_num / sample_num # prior proba of positive class
    the_base_log_loss = prior_log_loss(frac_pos, task)
    # Alternative computation of the same thing (slower)    
    # Should always return the same thing except in the multi-label case
    # For which the analytic solution makes more sense
    if debug_flag:
        base_prediction = np.empty(prediction.shape)
        for k in range(sample_num): base_prediction[k,:] = frac_pos
        base_log_loss = log_loss(solution, base_prediction, task)  
        diff = np.array(abs(the_base_log_loss-base_log_loss))
        if len(diff.shape)>0: diff=max(diff)
        if(diff)>1e-10: 
            print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
    # Exponentiate to turn into an accuracy-like score.
    # In the multi-label case, we need to average AFTER taking the exp 
    # because it is an NL operation
    pac = mvmean(np.exp(-the_log_loss)) 
    base_pac = mvmean(np.exp(-the_base_log_loss))
    # Normalize: 0 for random, 1 for perfect    
    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
    return score

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def log_loss(solution, prediction, task = 'binary.classification'):
    ''' Log loss for binary and multiclass. '''
    [sample_num, label_num] = solution.shape
    eps = 1e-15

    pred = np.copy(prediction) # beware: changes in prediction occur through this
    sol = np.copy(solution)
    if (task == 'multiclass.classification') and (label_num>1):
        # Make sure the lines add up to one for multi-class classification
        norma = np.sum(prediction, axis=1)
        for k in range(sample_num):
            pred[k,:] /= sp.maximum (norma[k], eps) 
        # Make sure there is a single label active per line for multi-class classification
        sol = binarize_predictions(solution, task='multiclass.classification')
        # For the base prediction, this solution is ridiculous in the multi-label case

    # Bounding of predictions to avoid log(0),1/0,...
    pred = sp.minimum (1-eps, sp.maximum (eps, pred))
    # Compute the log loss    
    pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
    if (task != 'multiclass.classification') or (label_num==1):
        # The multi-label case is a bunch of binary problems.
        # The second class is the negative class for each column.
        neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
        log_loss = pos_class_log_loss + neg_class_log_loss
        # Each column is an independent problem, so we average.
        # The probabilities in one line do not add up to one.
        # log_loss = mvmean(log_loss) 
        # print('binary {}'.format(log_loss))
        # In the multilabel case, the right thing i to AVERAGE not sum
        # We return all the scores so we can normalize correctly later on
    else:
        # For the multiclass case the probabilities in one line add up one.
        log_loss = pos_class_log_loss
        # We sum the contributions of the columns.
        log_loss = np.sum(log_loss) 
        #print('multiclass {}'.format(log_loss))
    return log_loss

项目：AutoML5 作者：djajetic | 项目源码 | 文件源码

def log_loss_(solution, prediction):
    return metrics.log_loss(solution, prediction)

项目：hyperband 作者：zygmuntz | 项目源码 | 文件源码

def train_and_eval_sklearn_classifier( clf, data ):

    x_train = data['x_train']
    y_train = data['y_train']

    x_test = data['x_test']
    y_test = data['y_test'] 

    clf.fit( x_train, y_train ) 

    try:
        p = clf.predict_proba( x_train )[:,1]   # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_train )

    ll = log_loss( y_train, p )
    auc = AUC( y_train, p )
    acc = accuracy( y_train, np.round( p ))

    print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )

    #

    try:
        p = clf.predict_proba( x_test )[:,1]    # sklearn convention
    except IndexError:
        p = clf.predict_proba( x_test )

    ll = log_loss( y_test, p )
    auc = AUC( y_test, p )
    acc = accuracy( y_test, np.round( p ))

    print "# testing  | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc ) 

    #return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
    return { 'loss': ll, 'log_loss': ll, 'auc': auc }

###

# "clf", even though it's a regressor