Python pandas 模块,get_dummies() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.get_dummies()

项目:ScoreCardModel    作者:data-science-tools    | 项目源码 | 文件源码
def transform(self, x):
        """
        Parameters:

            x (Sequence): - ???????

        Returns:

            np.array: - ????????????numpy??

        """
        s = pd.cut(x, bins=self.bins)
        d = pd.get_dummies(s)
        z = d.T.to_dict()
        re = []
        for i, v in z.items():
            for j, u in v.items():
                if u == 1:
                    re.append(str(j))
        return np.array(re)
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def data_preprocess(train,test):
    outlier_idx = [4,11,13,20,46,66,70,167,178,185,199, 224,261, 309,313,318, 349,412,423,440,454,477,478, 523,540, 581,588,595,654,688, 691, 774, 798, 875, 898,926,970,987,1027,1109, 1169,1182,1239, 1256,1298,1324,1353,1359,1405,1442,1447]
    train.drop(train.index[outlier_idx],inplace=True)
    all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'],
                          test.loc[:,'MSSubClass':'SaleCondition']))

    to_delete = ['Alley','FireplaceQu','PoolQC','Fence','MiscFeature']
    all_data = all_data.drop(to_delete,axis=1)

    train["SalePrice"] = np.log1p(train["SalePrice"])
    #log transform skewed numeric features
    numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index
    skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
    skewed_feats = skewed_feats[skewed_feats > 0.75]
    skewed_feats = skewed_feats.index
    all_data[skewed_feats] = np.log1p(all_data[skewed_feats])
    all_data = pd.get_dummies(all_data)
    all_data = all_data.fillna(all_data.mean())
    X_train = all_data[:train.shape[0]]
    X_test = all_data[train.shape[0]:]
    y = train.SalePrice

    return X_train,X_test,y
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def data_preprocess(train,test):
    outlier_idx = [4,11,13,20,46,66,70,167,178,185,199, 224,261, 309,313,318, 349,412,423,440,454,477,478, 523,540, 581,588,595,654,688, 691, 774, 798, 875, 898,926,970,987,1027,1109, 1169,1182,1239, 1256,1298,1324,1353,1359,1405,1442,1447]
    train.drop(train.index[outlier_idx],inplace=True)
    all_data = pd.concat((train.loc[:,'MSSubClass':'SaleCondition'],
                          test.loc[:,'MSSubClass':'SaleCondition']))

    to_delete = ['Alley','FireplaceQu','PoolQC','Fence','MiscFeature']
    all_data = all_data.drop(to_delete,axis=1)

    train["SalePrice"] = np.log1p(train["SalePrice"])
    #log transform skewed numeric features
    numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index
    skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna())) #compute skewness
    skewed_feats = skewed_feats[skewed_feats > 0.75]
    skewed_feats = skewed_feats.index
    all_data[skewed_feats] = np.log1p(all_data[skewed_feats])
    all_data = pd.get_dummies(all_data)
    all_data = all_data.fillna(all_data.mean())
    X_train = all_data[:train.shape[0]]
    X_test = all_data[train.shape[0]:]
    y = train.SalePrice

    return X_train,X_test,y
项目:HousePricePredictionKaggle    作者:Nuwantha    | 项目源码 | 文件源码
def data_preprocess(train, test):
    outlier_idx = [4, 11, 13, 20, 46, 66, 70, 167, 178, 185, 199, 224, 261, 309, 313, 318, 349, 412, 423, 440, 454, 477,
                   478, 523, 540, 581, 588, 595, 654, 688, 691, 774, 798, 875, 898, 926, 970, 987, 1027, 1109, 1169,
                   1182, 1239, 1256, 1298, 1324, 1353, 1359, 1405, 1442, 1447]
    train.drop(train.index[outlier_idx], inplace=True)
    all_data = pd.concat((train.loc[:, 'MSSubClass':'SaleCondition'],
                          test.loc[:, 'MSSubClass':'SaleCondition']))

    to_delete = ['Alley', 'FireplaceQu', 'PoolQC', 'Fence', 'MiscFeature']
    all_data = all_data.drop(to_delete, axis=1)

    train["SalePrice"] = np.log1p(train["SalePrice"])
    # log transform skewed numeric features
    numeric_feats = all_data.dtypes[all_data.dtypes != "object"].index
    skewed_feats = train[numeric_feats].apply(lambda x: skew(x.dropna()))  # compute skewness
    skewed_feats = skewed_feats[skewed_feats > 0.75]
    skewed_feats = skewed_feats.index
    all_data[skewed_feats] = np.log1p(all_data[skewed_feats])
    all_data = pd.get_dummies(all_data)
    all_data = all_data.fillna(method='ffill')
    X_train = all_data[:train.shape[0]]
    X_test = all_data[train.shape[0]:]
    y = train.SalePrice

    return X_train, X_test, y
项目:cloudml-samples    作者:GoogleCloudPlatform    | 项目源码 | 文件源码
def generator_input(input_file, chunk_size):
  """Generator function to produce features and labels
     needed by keras fit_generator.
  """
  input_reader = pd.read_csv(tf.gfile.Open(input_file[0]),
                           names=CSV_COLUMNS,
                           chunksize=chunk_size,
                           na_values=" ?")

  for input_data in input_reader:
    input_data = input_data.dropna()
    label = pd.get_dummies(input_data.pop(LABEL_COLUMN))

    input_data = to_numeric_features(input_data)
    n_rows = input_data.shape[0]
    return ( (input_data.iloc[[index % n_rows]], label.iloc[[index % n_rows]]) for index in itertools.count() )
项目:tensorflow    作者:KirovVerst    | 项目源码 | 文件源码
def next_batch(df, i=None):
    """

    :param df: pandas dataframe
    :param i: batch index
    :return: (numpy array x, numpy array y)
    """
    if i is None:
        start = 0
        end = df.shape[0]
    else:
        start = BATCH_SIZE * i
        end = BATCH_SIZE * (i + 1)
    result = df[start:end]
    if "Survived" in result:
        batch_ys = pd.get_dummies(result.pop('Survived').values).as_matrix()
        batch_xs = result.as_matrix()
        return batch_xs, batch_ys
    else:
        return result.as_matrix()
项目:dask-ml    作者:dask    | 项目源码 | 文件源码
def transform(self, X, y=None):
        """Dummy encode the categorical columns in X

        Parameters
        ----------
        X : pd.DataFrame or dd.DataFrame
        y : ignored

        Returns
        -------
        transformed : pd.DataFrame or dd.DataFrame
            Same type as the input
        """
        if not X.columns.equals(self.columns_):
            raise ValueError("Columns of 'X' do not match the training "
                             "columns. Got {!r}, expected {!r}".format(
                                 X.columns, self.columns
                             ))
        if isinstance(X, pd.DataFrame):
            return pd.get_dummies(X, drop_first=self.drop_first)
        elif isinstance(X, dd.DataFrame):
            return dd.get_dummies(X, drop_first=self.drop_first)
        else:
            raise TypeError("Unexpected type {}".format(type(X)))
项目:aboleth    作者:data61    | 项目源码 | 文件源码
def input_fn(df):
    """Format the downloaded data."""
    # Creates a dictionary mapping from each continuous feature column name (k)
    # to the values of that column stored in a constant Tensor.
    continuous_cols = [df[k].values for k in CONTINUOUS_COLUMNS]
    X_con = np.stack(continuous_cols).astype(np.float32).T

    # Standardise
    X_con -= X_con.mean(axis=0)
    X_con /= X_con.std(axis=0)

    # Creates a dictionary mapping from each categorical feature column name
    categ_cols = [np.where(pd.get_dummies(df[k]).values)[1][:, np.newaxis]
                  for k in CATEGORICAL_COLUMNS]
    n_values = [np.amax(c) + 1 for c in categ_cols]
    X_cat = np.concatenate(categ_cols, axis=1).astype(np.int32)

    # Converts the label column into a constant Tensor.
    label = df[LABEL_COLUMN].values[:, np.newaxis]

    # Returns the feature columns and the label.
    return X_con, X_cat, n_values, label
项目:strategy    作者:kanghua309    | 项目源码 | 文件源码
def replay(self):
        """Memory Management and training of the agent
        """
        if len(self.memory) < self.batch_size:
            return

        state, action, reward, next_state, done = self._get_batches()
        reward += (self.gamma
                   * np.logical_not(done)
                   * np.amax(self.model.predict(next_state), axis=1))
        q_target = self.target_model.predict(state)

        _ = pd.Series(action)
        one_hot = pd.get_dummies(_).as_matrix()
        action_batch = np.where(one_hot == 1)
        q_target[action_batch] = reward
        return self.model.fit(state, q_target,
                              batch_size=self.batch_size,
                              epochs=1,
                              verbose=False)
项目:next-book    作者:EmmaOnThursday    | 项目源码 | 文件源码
def make_date_columns_categorical_binary(book_attributes):
    """Turn all date columns in book_attributes into binary categorical columns."""

    # bucket publish dates & insert categorical data columns into data frame
    orig_pub_year_cat = transform_pub_dates(book_attributes['original_pub_year'])
    book_attributes.insert(loc=5, column='orig_pub_year_cat', value=orig_pub_year_cat)

    pub_year_cat = transform_pub_dates(book_attributes['pub_year'])
    book_attributes.insert(loc=5, column='pub_year_cat', value=pub_year_cat)

    # turn date categories into binary dataframes; merge back into book_attributes
    pub_year_dummies = pd.get_dummies(book_attributes['pub_year_cat'])
    orig_year_dummies = pd.get_dummies(book_attributes['orig_pub_year_cat'])

    book_full_attr = book_attributes.merge(pub_year_dummies,left_index=True, right_index=True)
    book_full_attr = book_full_attr.merge(orig_year_dummies,left_index=True, right_index=True)

    return book_full_attr
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_user_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00'):
    '''
    ????????,??????.
    '''
    dump_path = './cache/user_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date=start_date, end_date=end_date, field=['user_id', 'time', 'type'])
        prefix = 'Action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
        type_dummies = pd.get_dummies(df['type'], prefix=prefix)
        df = pd.concat([df, type_dummies], axis=1)
        drop_cols = ['time', 'type']
        df.drop(drop_cols, axis=1, inplace=True)
        df = df.groupby(['user_id'], as_index=False).sum()
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)
    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_base_user_feat(end_date='2016-04-16'):
    '''
    ????????
    '''
    dump_path = './cache/base_user_feat_{0}.pkl'.format(end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = pd.read_csv(USER_FILE, encoding='gbk')
        # sex_dummies = pd.get_dummies(df.sex, prefix='sex')
        df.user_reg_tm.fillna('2016-02-01', inplace=True)
        df.user_reg_tm = pd.to_datetime(df.user_reg_tm).apply(lambda t: pd.to_datetime('2016-02-01') if t > pd.to_datetime('2016-04-15') else t)
        df['reg_tm_dist'] = df.user_reg_tm.apply(lambda t: (pd.to_datetime(end_date) - t).days)
        df = df[['user_id', 'user_lv_cd', 'reg_tm_dist']]
        # df = pd.concat([df, sex_dummies], axis=1)
        # age_dummies = pd.get_dummies(df.age, prefix='age')
        # N = age_dummies.shape[1]
        # age_dummies.columns = ['age_{0}'.format(i) for i in range(N)]
        # df = pd.concat([df, age_dummies], axis=1)
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)
    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_UIPair_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', actions= [1,2,3,4,5,6]):
    '''
    UI pair????
    '''
    dump_path = './cache/UIPair_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'sku_id', 'cate', 'type'])
        prefix = 'UIPair_action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
        type_dummies = pd.get_dummies(df['type'], prefix=prefix)
        df = pd.concat([df, type_dummies], axis=1)
        df.drop(['type'], axis=1, inplace=True)
        df = df.groupby(['user_id', 'sku_id', 'cate'], as_index=False).sum()
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    actions.sort()
    rt_cols = ['user_id', 'sku_id', 'cate']
    rt_cols.extend(['UIPair_action_cnt_{0}_{1}_{2}'.format(start_date[:10], end_date[:10], i) for i in actions])
    df = df[rt_cols]

    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_UCPair_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', actions=[1,2,3,4,5,6]):
    '''
    ??UCPair???
    '''
    dump_path = './cache/UCPair_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'type', 'cate'])
        prefix = 'UCPair_action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
        type_dummies = pd.get_dummies(df['type'], prefix=prefix)
        df = pd.concat([df, type_dummies], axis=1)
        df = df.groupby(['user_id', 'cate'], as_index=False).sum()
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    actions.sort()
    rt_cols = ['user_id', 'cate']
    rt_cols.extend(['UCPair_action_cnt_{0}_{1}_{2}'.format(start_date[:10], end_date[:10], i) for i in actions])
    df = df[rt_cols]

    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_base_item_feat(end_date = '2016/4/16'):
    '''
    ??????
    '''
    JComment = pd.read_csv(COMMENT_FILE, encoding='gbk')
    end_date = pd.to_datetime(end_date)
    JComment.dt = pd.to_datetime(JComment.dt)
    dts = JComment.dt.drop_duplicates()
    dts.sort_index(inplace=True, ascending=False)
    for dt in dts.iteritems():
        if dt[-1] < end_date:
            break
    JComment = JComment[JComment.dt == dt[-1]].drop(['dt'], axis=1)
    Comment_num_dummies = pd.get_dummies(JComment.comment_num, prefix='Comment_num')
    JComment = pd.concat([JComment, Comment_num_dummies], axis=1)

    return JComment.drop(['comment_num'], axis=1)
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_item_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', actions=[1,2,3,4,5,6]):
    '''
    ??????
    '''
    dump_path = './cache/item_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['sku_id', 'type'])
        prefix = 'item_action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
        type_dummies = pd.get_dummies(df['type'], prefix=prefix)
        df = pd.concat([df, type_dummies], axis=1)
        df.drop(['type'], axis=1, inplace=True)
        df = df.groupby(['sku_id'], as_index=False).sum()
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    rt_cols = ['sku_id']
    rt_cols.extend(['item_action_cnt_{0}_{1}_{2}'.format(start_date[:10], end_date[:10], i) for i in actions])
    df = df[rt_cols]

    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_UBPair_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-01 00:00:00', actions = [1,2,3,4,5,6]):
    '''
    ????????
    '''
    dump_path = './cache/UBPair_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'brand', 'type'])
        prefix = 'UBPair_action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
        type_dummies = pd.get_dummies(df.type, prefix=prefix)
        df = pd.concat([df, type_dummies], axis=1)
        df.drop(['type'], axis=1, inplace=True)
        df = df.groupby(['user_id', 'brand'], as_index=False).sum()
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    rt_cols = ['user_id', 'brand']
    rt_cols.extend(['UBPair_action_cnt_{0}_{1}_{2}'.format(start_date[:10], end_date[:10], i) for i in actions])
    df = df[rt_cols]

    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_BCPair_action_cnt(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', actions=[1,2,3,4,5,6]):
    '''
    ????-??????
    '''
    dump_path = './cache/BCPair_action_cnt_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['brand', 'cate', 'type'])
        prefix = 'BCPair_action_cnt_{0}_{1}'.format(start_date[:10], end_date[:10])
        type_dummies = pd.get_dummies(df.type, prefix=prefix)
        df = pd.concat([df.drop(['type'], axis=1), type_dummies], axis=1)
        df = df.groupby(['brand', 'cate'], as_index=False).sum()
        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    rt_cols = ['brand', 'cate']
    rt_cols.extend(['BCPair_action_cnt_{0}_{1}_{2}'.format(start_date[:10], end_date[:10], i) for i in actions])
    df = df[rt_cols]

    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_user_act_cnt_with_timeZone(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00'):
    '''
    ????????????
    '''
    dump_path = './cache/user_act_cnt_with_timeZone_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'time_zone'])
        timeZone_dummies = pd.get_dummies(df.time_zone, prefix='time_zone_cnt')
        df = pd.concat([df, timeZone_dummies], axis=1)
        df.drop(['time_zone'], axis=1, inplace=True)
        df = df.groupby(['user_id'], as_index=False).sum()

        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_UCPair_act_cnt_with_timeZone(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', cate=[8]):
    '''
    ?????????????????
    '''
    dump_path = './cache/UCPair_act_cnt_with_timeZone_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'time_zone', 'cate'])
        timeZone_dummies = pd.get_dummies(df.time_zone, prefix='uc_time_zone_cnt')
        df = pd.concat([df, timeZone_dummies], axis=1)
        df.drop(['time_zone'], axis=1, inplace=True)
        df = df.groupby(['user_id', 'cate'], as_index=False).sum()

        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    df = df[df.cate.isin(cate)]
    return df
项目:JData-algorithm-competition    作者:wrzto    | 项目源码 | 文件源码
def load_UIPair_act_cnt_with_timeZone(start_date = '2016-02-01 00:00:00', end_date = '2016-04-16 00:00:00', cate=[8]):
    '''
    ????????????????
    '''
    dump_path = './cache/UIPair_act_cnt_with_timeZone_{0}_{1}.pkl'.format(start_date[:10], end_date[:10])
    if os.path.exists(dump_path):
        with open(dump_path, 'rb') as f:
            df = pickle.load(f)
    else:
        df = get_action_data(start_date = start_date, end_date = end_date, field=['user_id', 'time_zone', 'sku_id'])
        timeZone_dummies = pd.get_dummies(df.time_zone, prefix='time_zone_cnt')
        df = pd.concat([df, timeZone_dummies], axis=1)
        df.drop(['time_zone'], axis=1, inplace=True)
        df = df.groupby(['user_id', 'sku_id'], as_index=False).sum()

        with open(dump_path, 'wb') as f:
            pickle.dump(df, f)

    return df
项目:expected_goals    作者:andrebrener    | 项目源码 | 文件源码
def get_table(train_table):
    x_cols = []
    for col in train_table.columns:
        # print(data[col].value_counts())
        if col not in ['result', 'team_name', 'competition', 'season_x',
                       'surname']:
            train_table[col] = train_table[col].astype(str)
            x_cols.append(col)

    # print(x_cols)

    X = pd.get_dummies(train_table[x_cols])
    y = train_table['result']

    print(train_table.shape)
    print(X.shape)
    print(y.shape)

    return X, y
项目:Davies_Bouldin_Index_KMeans    作者:akankshadara    | 项目源码 | 文件源码
def main():
    df = pd.read_csv("dataset.csv")
    df = df.dropna()
    # print df
    x1 = df.copy()
    del x1['Customer']
    del x1['Effective To Date']
    x4 = pd.get_dummies(x1)
    # print x4
    n = 10
    clf = k_means(x4, n_clusters = n)
    centroids = clf[0] 
    # 10 clusters
    labels = clf[1] 
    # print x4[1]
    index_db_val = compute_DB_index(x4, labels, centroids, n)
    print "The value of Davies Bouldin index for a K-Means cluser of size " + str(n) + " is: " + str(index_db_val)
项目:DSI-personal-reference-kit    作者:teb311    | 项目源码 | 文件源码
def dummify(df):
    '''
        Given a dataframe, for all the columns which are not numericly typed already,
        create dummies. This will NOT remove one of the dummies which is required for
        linear regression.

        returns DataFrame -- a dataframe with all non-numeric columns swapped into dummy columns
    '''
    obj_cols = []
    for cname in df.columns:
        if df[cname].dtype == object:
            obj_cols.append(cname)

    df = pd.get_dummies(df, columns=obj_cols)
    # for cname in obj_cols:
    #     del df[cname]

    return df
项目:TextStageProcessor    作者:mhyhre    | 项目源码 | 文件源码
def apriori_alg(trans, support=0.01, minlen=2):
    print('appr_1')
    dna = trans.unstack().dropna()
    print('appr_2')
    ts = pandas.get_dummies(dna).groupby(level=1).sum()
    print('appr_3')
    collen, rowlen = ts.shape
    pattern = []
    for cnum in range(minlen, rowlen + 1):
        for cols in combinations(ts, cnum):
            print('cnum', cnum)
            patsup = ts[list(cols)].all(axis=1).sum()
            patsup = float(patsup) / collen
            pattern.append([",".join(cols), patsup])
    print('appr_4')
    sdf = pandas.DataFrame(pattern, columns=["Pattern", "Support"])
    print('appr_5')
    results = sdf[sdf.Support >= support]
    print('appr_6')
    return results

# ????????? Apriori ?? ? ??
项目:Tencent_Social_Ads    作者:freelzy    | 项目源码 | 文件源码
def doOneHot(X_train, X_test):
    res = X_test[['instanceID']]
    X_test.drop('instanceID', axis=1, inplace=True)
    data = X_train.append(X_test, ignore_index=True)
    del X_train, X_test
    gc.collect()

    features_trans = ['gender','appCategory_main','connectionType']
    data = pd.get_dummies(data, columns=features_trans)

    X_train = data.loc[data['label'] != -1, :]
    X_test = data.loc[data['label'] == -1, :]
    X_test.loc[:, 'instanceID'] = res.values
    del data
    gc.collect()
    return X_train, X_test
项目:Steal-ML    作者:ftramer    | 项目源码 | 文件源码
def prepare_gss(onehot=True):
    data = pd.read_csv('../data/GSShappiness.csv')

    del data['year']
    del data['id']

    data = data.dropna()
    target = "Happiness level"

    X = data[list(set(data.columns) - set([target]))]
    y = data[target]

    if onehot:
        X = pd.get_dummies(X)

    return X, y
项目:jsaicup2017    作者:SS1031    | 项目源码 | 文件源码
def thunder():
    if os.path.exists('../dataset/thunder.pkl'):
        return pd.read_pickle('../dataset/thunder.pkl')

    thunder_df = pd.read_csv('../input/thunder.csv',
                             names=[
                                 'datetime',    # ????
                                 'lat',         # ??(10??)
                                 'lon',         # ??(10??)
                                 'type'         # ???, CG: ???, IC: ???
                             ])

    # ?????????
    thunder_df.datetime = pd.to_datetime(thunder_df.datetime)

    # observation_point_df.to_pickle('../dataset/observation_point.pkl')
    thunder_df = pd.concat([thunder_df, pd.get_dummies(thunder_df.type)], axis=1)
    thunder_df.to_pickle('../dataset/thunder_df.pkl')

    return thunder_df
项目:Titanic    作者:dataventureutc    | 项目源码 | 文件源码
def load_data():

    data = pd.read_csv('data/train.csv')

    # drop rows with empty features / gaps in columns
    data = data.dropna()

    # Categorical values into numerical (one hot encoding)
    one_hot_embarked = pd.get_dummies(data['Embarked'], prefix='embarked')
    data = data.join(one_hot_embarked)

    one_hot_pclass = pd.get_dummies(data['Pclass'], prefix='pclass')
    data = data.join(one_hot_pclass)

    # The sex column has only two values (M/F), so that only one column is required for encoding (0/1)
    # Intead of one hot encoding with two columns
    data['sex'] = data.apply(lambda x: 1 if (x['Sex'] == 'female') else 0, axis=1)

    # Drop features not used for training the model
    data = data.drop(['Cabin', 'Name', 'PassengerId', 'Pclass', 'Sex', 'Ticket', 'Embarked'], axis=1)

    return data.drop(['Survived'], axis=1), data[['Survived']]
项目:mars_express    作者:wsteitz    | 项目源码 | 文件源码
def parse_context_dmop(path):
    df = read(path, "dmop")

    # ATTT-A and ATTT-B are different
    attt = df[df['subsystem'].str.startswith("ATTT")]
    attt['subsystem'] = attt['subsystem'].str[:3] + attt['subsystem'].str[-1]

    df = pd.concat([attt, df])

    # take the first 4 chars
    df['subsystem'] = df['subsystem'].str[:4]

    # convert to 1 / 0
    df = pd.get_dummies(df.subsystem)
    df = df.resample("1h").sum().fillna(0.0)

    df['sum_dmop'] = df.sum(axis=1)

    return df
项目:mars_express    作者:wsteitz    | 项目源码 | 文件源码
def parse_context_ftl(path):
    raw = read(path, "ftl")

    df = raw.copy()
    df['ut_ms'] = pd.to_datetime(raw['utb_ms'], unit='ms')
    df.sort_values("ut_ms", inplace=True)
    # dummies
    df = df.set_index('ut_ms')
    dummies = pd.get_dummies(df.type).join(df['flagcomms'], how="outer")
    dummies = dummies.resample("1h").sum().fillna(0.0)

    df = raw.copy()
    df['event'] = df.type + df.flagcomms.astype("str")
    del df['type'], df['flagcomms']
    df['ute_ms'] = pd.to_datetime(df['ute_ms'], unit='ms')
    df['utb_ms'] = pd.to_datetime(df['utb_ms'], unit='ms')
    durations = [event_to_min_per_hour(df, event) for event in df.event.unique()]
    durations = pd.concat(durations, axis=1).fillna(0)

    return dummies.join(durations, how="outer")
项目:sklearnflask    作者:amirziai    | 项目源码 | 文件源码
def predict():
    if clf:
        try:
            json_ = request.json
            query = pd.get_dummies(pd.DataFrame(json_))

            # https://github.com/amirziai/sklearnflask/issues/3
            # Thanks to @lorenzori
            query = query.reindex(columns=model_columns, fill_value=0)

            prediction = list(clf.predict(query))

            return jsonify({'prediction': prediction})

        except Exception, e:

            return jsonify({'error': str(e), 'trace': traceback.format_exc()})
    else:
        print 'train first'
        return 'no model here'
项目:The_Ultimate_Student_Hunt    作者:analyticsvidhya    | 项目源码 | 文件源码
def preprocess(file,istrian):
    df=pd.read_csv(file,parse_dates=['Date'],dayfirst=True)
    end_missing=['Average_Atmospheric_Pressure','Max_Atmospheric_Pressure',
    'Min_Atmospheric_Pressure','Min_Ambient_Pollution','Max_Ambient_Pollution']
    df=df.fillna(-1)
    if istrian:
        outcome=df.Footfall
        df=df.drop(['Footfall'],axis=1)
    else:
        outcome=np.nan

    df['month']=df['Date'].apply(lambda x: x.month)
    df['date']=df['Date'].apply(lambda x: x.day)
    df['weekday']=df['Date'].apply(lambda x: x.weekday())
    df['sardiya']=df['month'].apply(lambda x: 1 if x in [1,2,11,12,3] else 0)
    df.date=df.date.apply(get_normal_date)
    dummies=pd.get_dummies(df.Park_ID,prefix='park')
    dummies=pd.get_dummies(df.Location_Type,prefix='location')
    df['Direction_Of_Wind2']=df.Direction_Of_Wind.apply(get_wind_dir)

    return df,outcome

#load training set
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def get_comment_product_fea(endtime):
    enddt = pd.to_datetime(endtime,format = '%Y-%m-%d')
    if enddt == pd.to_datetime('2016-04-15',format = '%Y-%m-%d'):
        commentdata = pd.read_csv(FilePath + CommentFile)
        commentdata = commentdata[(commentdata["dt"] == "2016-04-15")]
        commentdata = commentdata.sort_values(by="sku_id").reset_index()[["sku_id", "comment_num", "has_bad_comment", "bad_comment_rate"]]
        return commentdata
    else:
        startdt = enddt - pd.Timedelta(days=7)
        commentpath = FilePath + CommentFile
        commentdata_ALL = pd.read_csv(commentpath)  # ?Jdatya_comment.csv??????
        commentdata_ALL.dt = pd.to_datetime(commentdata_ALL.dt, format='%Y-%m-%d')  # ?dt????date??
        comment = commentdata_ALL[(commentdata_ALL.dt <= enddt) & (commentdata_ALL.dt > startdt)]
        df = pd.get_dummies(comment['comment_num'], prefix='comment_num')
        comment = pd.concat([comment, df], axis=1)
        comment = comment[['sku_id', 'has_bad_comment', 'bad_comment_rate', 'comment_num_1', 'comment_num_2', 'comment_num_3','comment_num_4']]
        sorted_comment = comment.sort_values(by=['sku_id']).reset_index().drop('index',1)
        #sorted_comment.to_csv(FilePath + 'skuFeaInComment_before'+str(enddt), index=False)
        return sorted_comment

# ????????
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def get_action_feat(start_time, end_time,action_data):
    actions=action_data[(action_data['time']>=start_time)&(action_data['time']<=end_time)]
    #actions = get_actions(start_time, end_time)
    #actions = actions[actions['cate'] == 8]
    actions = actions[['user_id', 'sku_id', 'type']]
    df = pd.get_dummies(actions['type'], prefix='%s-%s-action' % (start_time, end_time))
    actions = pd.concat([actions, df], axis=1)  # type: pd.DataFrame
    actions = actions.groupby(['user_id', 'sku_id'], as_index=False).sum()

    actions.fillna(0,inplace=True)
    name='%s-%s-action' % (start_time, end_time)
    actions[name+'_1256']=actions[name+'_1']+actions[name+'_2']+actions[name+'_5']+actions[name+'_6']
    actions[name+'_1256_d_4']=actions[name+'_4']/actions[name+'_1256']

    del actions['type']
    # action_fea_file = 'action_fea_' + STARTdt_str + 'to' + ENDdt_str + '.csv'
    # action_fea.to_csv(FilePath + action_fea_file, index=False)
    return actions

#????????????????????
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def get_basic_user_fea():
    user = pd.read_csv(FilePath+UserFile, encoding='gbk')
    # user['age'] = user['age'].map(convert_age)
    user['age']=user['age'].replace([u'16-25?',u'26-35?',u'36-45?',u'46-55?',u'56???'],[1,2,3,4,5])
    user=user[((user['age']==1) |
                (user['age']==2) |
                ( user['age']==3) |
                (user['age']==4) |
                (user['age']==5)|
                (user['age']==-1))]
    age_df = pd.get_dummies(user["age"], prefix="age")
    sex_df = pd.get_dummies(user["sex"], prefix="sex")
    user_lv_df = pd.get_dummies(user["user_lv_cd"], prefix="user_lv_cd")
    user = pd.concat([user['user_id'], age_df, sex_df, user_lv_df], axis=1)
    user.to_csv(FilePath + 'user_basic_fea.csv',index=False)
    return user

    #???????????????
项目:tdlstm    作者:bluemonk482    | 项目源码 | 文件源码
def next_batch(self):
        df = self.batch_df[self.pointer]
        x = np.array([d[0] for d in df])
        xl = np.array([d[1] for d in df])
        xr = np.array([d[2] for d in df])
        tar = np.array([d[3] for d in df])
        y = np.array([d[-1] for d in df])
        y = pd.get_dummies(y).values.astype(np.int32)
        seq_len = [len(seq) for seq in x]
        seq_len_l = [len(seq) for seq in xl]
        seq_len_r = [len(seq) for seq in xr]
        if self.dynamic_padding:
            x = np.array(self.pad_minibatches(x, 'RIGHT'))
            xl = np.array(self.pad_minibatches(xl, 'RIGHT'))
            xr = np.array(self.pad_minibatches(xr, 'RIGHT'))
        self.pointer += 1
        return x, y, seq_len, xl, seq_len_l, xr, seq_len_r, tar
项目:tdlstm    作者:bluemonk482    | 项目源码 | 文件源码
def next_batch(self):
        df = self.batch_df[self.pointer]
        x = np.array([d[0] for d in df])
        xl = np.array([d[1] for d in df])
        xr = np.array([d[2] for d in df])
        tar = np.array([d[3] for d in df])
        y = np.array([d[-1] for d in df])
        # y = pd.get_dummies(y).values.astype(np.int32)
        seq_len = [len(seq) for seq in x]
        seq_len_l = [len(seq) for seq in xl]
        seq_len_r = [len(seq) for seq in xr]
        if self.dynamic_padding:
            x = np.array(self.pad_minibatches(x, 'RIGHT'))
            xl = np.array(self.pad_minibatches(xl, 'RIGHT'))
            xr = np.array(self.pad_minibatches(xr, 'RIGHT'))
        self.pointer += 1
        return x, y, seq_len, xl, seq_len_l, xr, seq_len_r, tar
项目:MF_MBS_Default_Risk    作者:bentruitt    | 项目源码 | 文件源码
def load_data(in_file):
    # read csv file prepared by freddie_data_analysis module
    df = pd.read_csv(in_file)
    # drop unneeded columns
    columns = df.columns.tolist()
    for col in columns:
        if 'Unnamed' in col:
            df.drop(col, axis=1, inplace=True)
    df['published_date'] = pd.to_datetime(df['published_date'], errors='coerce')
    df.drop(['published_date'], axis=1, inplace=True)
    # replace nan values with 0
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(0, inplace=True)
    # apply get_dummies to particular columns
    df = pd.get_dummies(df, prefix=['state'], columns=['property_state'])
    df = pd.get_dummies(df, prefix=['ss'], columns=['special_servicer'])
    # return prepared dataframe
    return df
项目:face-to-emotion    作者:mhw32    | 项目源码 | 文件源码
def gen_fer2013_csv(csv_path, reshape_width=48, reshape_height=48):
    data = pd.read_csv(csv_path)
    pixels = data['pixels'].tolist()
    width, height = 48, 48
    faces = []
    for pixel_sequence in pixels:
        face = [int(pixel) for pixel in pixel_sequence.split(' ')]
        face = np.asarray(face).reshape(width, height)
        face = cv2.resize(face.astype('uint8'),
                          (reshape_width, reshape_height))
        faces.append(face.astype('float32'))

    faces = np.asarray(faces)
    faces = np.expand_dims(faces, -1)
    emotions = pd.get_dummies(data['emotion']).as_matrix()

    return faces, emotions
项目:real_estate    作者:cooperoelrichs    | 项目源码 | 文件源码
def make_x(self, df):
        x_spec = self.get_individualised_x_spec()


        X = df[XY.reduce_tuples(
            [a for a, b in x_spec if b != 'linear_by_categorical']
        )].copy()
        cats = XY.reduce_tuples(
            [a for a, b in x_spec if b == 'categorical' or b == 'ordinal']
        )

        X = self.prep_work(X, x_spec)

        X = pd.get_dummies(
            X, prefix=cats, prefix_sep='_', columns=cats,
            drop_first=False, dummy_na=False
        )

        return X
项目:jdata    作者:learn2Pro    | 项目源码 | 文件源码
def get_comments_product_feat(start_date, end_date):
    dump_path = './cache/comments_accumulate_%s_%s.pkl' % (start_date, end_date)
    if os.path.exists(dump_path):
        comments = pickle.load(open(dump_path))
    else:
        comments = pd.read_csv(comment_path)
        comment_date_end = end_date
        comment_date_begin = comment_date[0]
        for date in reversed(comment_date):
            if date < comment_date_end:
                comment_date_begin = date
                break
        comments = comments[(comments.dt >= comment_date_begin) & (comments.dt < comment_date_end)]
        df = pd.get_dummies(comments['comment_num'], prefix='comment_num')
        comments = pd.concat([comments, df], axis=1)  # type: pd.DataFrame
        # del comments['dt']
        # del comments['comment_num']
        comments = comments[
            ['sku_id', 'has_bad_comment', 'bad_comment_rate', 'comment_num_1', 'comment_num_2', 'comment_num_3',
             'comment_num_4']]
        pickle.dump(comments, open(dump_path, 'w'))
    return comments
项目:jdata    作者:learn2Pro    | 项目源码 | 文件源码
def get_accumulate_product_feat(start_date, end_date):
    feature = ['sku_id', 'product_action_1_ratio', 'product_action_2_ratio', 'product_action_3_ratio',
               'product_action_5_ratio', 'product_action_6_ratio']
    dump_path = './cache/product_feat_accumulate_%s_%s.pkl' % (start_date, end_date)
    if os.path.exists(dump_path):
        actions = pickle.load(open(dump_path))
    else:
        actions = get_actions(start_date, end_date)
        df = pd.get_dummies(actions['type'], prefix='action')
        actions = pd.concat([actions['sku_id'], df], axis=1)
        actions = actions.groupby(['sku_id'], as_index=False).sum()
        actions['product_action_1_ratio'] = actions['action_4'] / actions['action_1']
        actions['product_action_2_ratio'] = actions['action_4'] / actions['action_2']
        actions['product_action_3_ratio'] = actions['action_4'] / actions['action_3']
        actions['product_action_5_ratio'] = actions['action_4'] / actions['action_5']
        actions['product_action_6_ratio'] = actions['action_4'] / actions['action_6']
        actions = actions[feature]
        pickle.dump(actions, open(dump_path, 'w'))
    return actions
项目:JData    作者:Xls1994    | 项目源码 | 文件源码
def get_basic_user_feat():
    dump_path = './cache/basic_user.csv'
    # one-hot coding age,sex,lv-cd
    if os.path.exists(dump_path):
        # user = pickle.load(open(dump_path))
        user = pd.read_csv(dump_path)
    else:
        user = pd.read_csv(user_path, encoding='gbk')
        user['age'] = user['age'].map(convert_age)  # ?????

        user['user_reg_tm'] = user['user_reg_tm'].map(convert_reg_date)

        age_df = pd.get_dummies(user["age"], prefix="age")

        sex_df = pd.get_dummies(user["sex"], prefix="sex")
        # user_lv_df = pd.get_dummies(user["user_lv_cd"], prefix="user_lv_cd")
        user = pd.concat([user[['user_id', 'user_reg_tm', 'user_lv_cd']], age_df, sex_df], axis=1)
        # pickle.dump(user, open(dump_path, 'w'))
        user.to_csv(dump_path, index=False, encoding='utf-8')
    print 'finish get basic user info'
    return user
项目:JData    作者:Xls1994    | 项目源码 | 文件源码
def get_basic_product_feat():
    dump_path = './cache/basic_product.csv'
    # one-hot coding a1,a2,a3
    if os.path.exists(dump_path):
        # product = pickle.load(open(dump_path))
        product = pd.read_csv(dump_path)
    else:
        product = pd.read_csv(product_path)
        attr1_df = pd.get_dummies(product["a1"], prefix="a1")
        attr2_df = pd.get_dummies(product["a2"], prefix="a2")
        attr3_df = pd.get_dummies(product["a3"], prefix="a3")
        cate_df = pd.get_dummies(product['cate'], prefix='cate')
        brand_df = pd.get_dummies(product['brand'], prefix='brand')
        # product = pd.concat([product[['sku_id','brand']], attr1_df, attr2_df, attr3_df,cate_df], axis=1)
        product = pd.concat([product[['sku_id','brand']], attr1_df, attr2_df, attr3_df, brand_df, cate_df], axis=1)
        # pickle.dump(product, open(dump_path, 'w'))
        product.to_csv(dump_path, index=False)
    print 'finish get basic product info'
    return product
项目:JData    作者:Xls1994    | 项目源码 | 文件源码
def get_action_feat(start_date, end_date):
    '''
    Action:
    1.????????????
    2.???3.??????4.???5.???6.??
    '''
    dump_path = './cache/action_accumulate_%s_%s.csv' % (start_date, end_date)
    if os.path.exists(dump_path):
        # actions = pickle.load(open(dump_path))
        actions = pd.read_csv(dump_path)
    else:
        actions = get_actions(start_date, end_date)
        # actions = pd.read_csv(action_1_path)
        actions = actions[['user_id', 'sku_id', 'type']]
        df = pd.get_dummies(actions['type'], prefix='%s-%s-action' % (start_date, end_date))
        actions = pd.concat([actions, df], axis=1)  # type: pd.DataFrame
        # ??????????????
        actions = actions.groupby(['user_id', 'sku_id'], as_index=False).sum()
        del actions['type']
        # pickle.dump(actions, open(dump_path, 'w'))
        actions.to_csv(dump_path, index=False)
    print 'finish get action feat'
    return actions
项目:JData    作者:Xls1994    | 项目源码 | 文件源码
def get_accumulate_brand_feat(start_date, end_date):
    feature = ['brand', 'brand_action_1_ratio', 'brand_action_2_ratio', 'brand_action_3_ratio',
               'brand_action_5_ratio', 'brand_action_6_ratio', 'brand_action_num']
    dump_path = './cache/brand_feat_accumulate_%s_%s.csv' %(start_date,end_date)
    if os._exists(dump_path):
        actions = pd.read_csv(dump_path)
    else:
        actions = get_actions(start_date,end_date)
        df = pd.get_dummies(actions['type'],prefix='action')
        actions = pd.concat([actions['brand'],df],axis=1)
        actions = actions.groupby(['brand'],as_index = False).sum()
        actions['brand_action_1_ratio'] = actions['action_4']/actions['action_1']
        actions['brand_action_2_ratio'] = actions['action_4']/actions['action_2']
        actions['brand_action_3_ratio'] = actions['action_4']/actions['action_3']

        actions['brand_action_5_ratio'] = actions['action_4']/actions['action_5']
        actions['brand_action_6_ratio'] = actions['action_4']/actions['action_6']
        actions['brand_action_num'] = actions['action_1'] + actions['action_2'] + actions['action_3'] + actions[
            'action_4'] + actions['action_5'] + actions['action_6']
        actions = actions[feature]
        actions.replace(np.inf, 9999)
        actions.to_csv(dump_path)
    return  actions
    pass
项目:Benchmarks    作者:ECP-CANDLE    | 项目源码 | 文件源码
def load_data(shuffle=True, n_cols=None):
    train_path = get_p1_file('http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B2/P1B2.train.csv')
    test_path = get_p1_file('http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B2/P1B2.test.csv')

    usecols = list(range(n_cols)) if n_cols else None

    df_train = pd.read_csv(train_path, engine='c', usecols=usecols)
    df_test = pd.read_csv(test_path, engine='c', usecols=usecols)

    if shuffle:
        df_train = df_train.sample(frac=1, random_state=seed)
        df_test = df_test.sample(frac=1, random_state=seed)

    X_train = df_train.iloc[:, 2:].as_matrix()
    X_test = df_test.iloc[:, 2:].as_matrix()

    y_train = pd.get_dummies(df_train[['cancer_type']]).as_matrix()
    y_test = pd.get_dummies(df_test[['cancer_type']]).as_matrix()

    return (X_train, y_train), (X_test, y_test)
项目:mlbootcamp_5    作者:ivan-filonov    | 项目源码 | 文件源码
def build(self):
        train, _, test, _ = data.get()
        cset = []
        ntrain = len(train)
        df = pd.concat([train, test], axis=0)
        to_drop = df.columns
        for sc in ['height', 'weight', 'ap_hi', 'ap_lo']:
            tc = df[sc].apply(str)
            maxc = tc.apply(len).max()
            for n in range(maxc):
                df['ft_l_'+sc+'_'+str(n)] = tc.apply(lambda s:ord(s[n])  if n < len(s) else -1)
                df['ft_r_'+sc+'_'+str(n)] = tc.apply(lambda s:ord(s[-n]) if n < len(s) else -1)
                cset.append('ft_l_'+sc+'_'+str(n))
                cset.append('ft_r_'+sc+'_'+str(n))

        df = pd.get_dummies(df, columns=cset).drop(to_drop, axis=1)
        self.train_= df[:ntrain]
        self.test_ = df[ntrain:]
        return self.train_, self.test_, None
项目:mlbootcamp_5    作者:ivan-filonov    | 项目源码 | 文件源码
def build(self):
        train, y, test, _ = data.get()

        ntrain = len(train)
        df = pd.concat([train, test], axis=0)
        to_drop = df.columns

        dcn = []
        for n in [2, 5, 10, 15, 25]:
            cname = 'kmeans_' + str(n)
            dcn.append(cname)
            df[cname] = cluster.KMeans(n_clusters=n).fit_predict(df)

        df = pd.get_dummies(df, columns=dcn)

        df = df.drop(to_drop, axis=1)
        train = df[:ntrain]
        test = df[ntrain:].copy()

        return train.astype('int32'), test.astype('int32'), None