Python sklearn.feature_selection 模块,RFE 实例源码

我们从Python开源项目中,提取了以下10个代码示例,用于说明如何使用sklearn.feature_selection.RFE

项目:datasciences    作者:BenChehade    | 项目源码 | 文件源码
def greedy_elim(df):

    # do feature selection using boruta
    X = df[[x for x in df.columns if x!='SalePrice']]
    y = df['SalePrice']
    #model = RandomForestRegressor(n_estimators=50)
    model = GradientBoostingRegressor(n_estimators=50, learning_rate=0.05)
    # 150 features seems to be the best at the moment. Why this is is unclear.
    feat_selector = RFE(estimator=model, step=1, n_features_to_select=150)

    # find all relevant features
    feat_selector.fit_transform(X.as_matrix(), y.as_matrix())

    # check selected features
    features_bool = np.array(feat_selector.support_)
    features = np.array(X.columns)
    result = features[features_bool]
    #print(result)

    # check ranking of features
    features_rank = feat_selector.ranking_
    #print(features_rank)
    rank = features_rank[features_bool]
    #print(rank)

    return result
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_compare_with_no_feature_selection():
    '''
    compare the result before the selection and after
    :return: None
    '''
    iris=load_iris()
    X,y=iris.data,iris.target
    estimator=LinearSVC()
    selector=RFE(estimator=estimator,n_features_to_select=2)
    X_t=selector.fit_transform(X,y)
    X_train,X_test,y_train,y_test=cross_validation.train_test_split(X, y,
                test_size=0.25,random_state=0,stratify=y)
    X_train_t,X_test_t,y_train_t,y_test_t=cross_validation.train_test_split(X_t, y,
                test_size=0.25,random_state=0,stratify=y)
    clf=LinearSVC()
    clf_t=LinearSVC()
    clf.fit(X_train,y_train)
    clf_t.fit(X_train_t,y_train_t)
    print("Original DataSet: test score=%s"%(clf.score(X_test,y_test)))
    print("Selected DataSet: test score=%s"%(clf_t.score(X_test_t,y_test_t)))
项目:tcsl    作者:machinelearningnanodegree    | 项目源码 | 文件源码
def recurvise_index(self, clf,):
        # rank all features, i.e continue the elimination until the last one
        rfe = RFE(clf, n_features_to_select=1)
        rfe.fit(self.features, self.labels)
        # map recursive feature score to the feature names
        rfedict = {k: v for k, v in
                   zip(self.features.columns.tolist(),
                       map(lambda x: round(x, 4),
                           rfe.ranking_
                           )
                       )
                   }
        return rfedict
项目:Default-Credit-Card-Prediction    作者:AlexPnt    | 项目源码 | 文件源码
def rfe_selection(X,y,n_features):
    """
    Performs the Recursive Feature Elimination method and selects the top ranking features

    Keyword arguments:
    X -- The feature vectors
    y -- The target vector
    n_features -- n best ranked features
    """

    if verbose:
        print '\nPerforming Feature Selection based on the Recursive Feature Elimination method ...'

    clf=RandomForestClassifierWithCoef(n_estimators=10,n_jobs=-1)
    fs= RFE(clf, n_features, step=1)
    fs= fs.fit(X,y)
    ranks=fs.ranking_

    feature_indexes=[]
    for i in xrange(len(ranks)):
        if ranks[i]==1:
            feature_indexes+=[i]

    return X[:,feature_indexes[0:n_features]],feature_indexes[0:n_features]     #return selected features and original index features
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def __init__(self, conf):
        SemiSupervisedFeatureSelection.__init__(self, conf)
        self.projection = RFE(estimator = conf.model,
                              n_features_to_select = conf.num_components,
                              step = conf.step)
项目:toho_mir_ml    作者:kodack64    | 项目源码 | 文件源码
def featureRank(useFeature,trueSet,falseSet):

    # load data and split
    X_true = []
    for dn in trueSet:
        fin = open("./learn/data/"+useFeature+"_"+dn+".pkl","rb")
        X_true.append(pickle.load(fin))
        fin.close()
    X_true = np.vstack(X_true)
    print(X_true.shape)

    X_false = []
    for dn in falseSet:
        fin = open("./learn/data/"+useFeature+"_"+dn+".pkl","rb")
        X_false.append(pickle.load(fin))
        fin.close()
    X_false = np.vstack(X_false)
    print(X_false.shape)

    test_size = 0.3
    X_true_train,X_true_test = train_test_split(X_true ,test_size=test_size)
    X_false_train, X_false_test = train_test_split(X_false ,train_size=len(X_true_train),test_size=len(X_true_test))

    X = np.vstack([X_true_train,X_false_train])
    X_ = np.vstack([X_true_test,X_false_test])
    Y = [1]*len(X_true_train)+[0]*len(X_false_train)
    Y_ = [1]*len(X_true_test)+[0]*len(X_false_test)
    X,Y = shuffle(X,Y)
    X_,Y_ = shuffle(X_,Y_)

    featNames = ml_feature_name.getFeatureName(useFeature)

    clf = LinearSVC(C=0.1)
    rfe = RFE(estimator =clf, n_features_to_select=1,step=1)
    rfe.fit(X,Y)
    ranks = rfe.ranking_
    if(useFeature =="rp"):
        fout = open("./learn/feature/rp_feature_rank.txt","w")
        for i,r in enumerate(ranks):
            fout.write("{0} {1}\n".format(i,r))
        fout.close()

    rankFeat = list(zip(ranks,featNames))
    rankFeat.sort()
    for rf in rankFeat:
        if(useFeature in ["tfidf_1gram","tfidf_2gram","tfidf_3gram","tfidf_4gram"]):
            if(ml_feature_name.isDiatonic(rf[1])):
                print(rf)
        else:
            print(rf)
项目:ML-note    作者:JasonK93    | 项目源码 | 文件源码
def test_RFE():
    '''
    test the method of RFE, the number of feature aim to 2
    :return: None
    '''
    iris=load_iris()
    X=iris.data
    y=iris.target
    estimator=LinearSVC()
    selector=RFE(estimator=estimator,n_features_to_select=2)
    selector.fit(X,y)
    print("N_features %s"%selector.n_features_)
    print("Support is %s"%selector.support_)
    print("Ranking %s"%selector.ranking_)
项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def sk_feature_ref():
    # load the iris datasets
    dataset = datasets.load_iris()
    # create a base classifier used to evaluate a subset of attributes
    model_lr = LogisticRegression()
    # create the RFE model and select 3 attributes
    rfe = RFE(model_lr, 3)
    rfe = rfe.fit(dataset.data, dataset.target)
    # summarize the selection of the attributes
    print rfe.support_
    # [False  True  True  True]
    print rfe.ranking_
    # [2 1 1 1]
    print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), dataset.feature_names))
    # [(1.0, 'petal length (cm)'), (1.0, 'petal width (cm)'), (1.0, 'sepal width (cm)'), (2.0, 'sepal length (cm)')]
项目:python_utils    作者:Jayhello    | 项目源码 | 文件源码
def sk_feature_ref_v2():
    X, Y = get_dummy_data()
    names = ['f1', 'f2', 'f3']

    model_lr = LogisticRegression()

    rfe = RFE(model_lr, 2)
    rfe = rfe.fit(X, Y)

    print rfe.support_
    print rfe.ranking_
    print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))
项目:easyML    作者:aarshayj    | 项目源码 | 文件源码
def recursive_feature_elimination(self, nfeat=None, step=1, inplace=False):

        """A method to implement recursive feature elimination on the model.
        Note that CV is not performed in this function. The method will 
        continue to eliminate some features (specified by step parameter)
        at each step until the specified number of features are reached.

        Parameters
        __________
        nfeat : int or None, default=None
            The num of top features to select. If None, half of the features 
            are selected.

        step : int or float, default=1
            If int, then step corresponds to the number of features to remove
            at each iteration. 
            If float and within (0.0, 1.0), then step corresponds to the 
            percentage (rounded down) of features to remove at each 
            iteration.
            If float and greater than one, then integral part will be
            considered as an integer input

        inplace : bool, default=False
            If True, the predictors of the class are modified to those 
            selected by the RFE procedure.

        Returns
        _______
        selected : A series object containing the selected features as 
        index and their rank in selection as values
        """
        rfe = RFE(self.alg, n_features_to_select=nfeat, step=step)

        rfe.fit(
                self.datablock.train[self.predictors], 
                self.datablock.train[self.datablock.target]
                )

        ranks = pd.Series(rfe.ranking_, index=self.predictors)

        selected = ranks.loc[rfe.support_]

        if inplace:
            self.set_predictors(selected.index.tolist())

        return selected