我们从Python开源项目中,提取了以下7个代码示例,用于说明如何使用util.load_data()。
def ge_cmd_learn(): args = parse_arg_learn() # prepare input to GE_learn data = GE_data() data.dat = util.load_data(args.data) data.labeled_features = util.load_labeled_features(args.labeled_features) init_model = GE_model() param = GE_param() if args.l2: param.l2_regularization = args.l2 final_model_path = args.model # print data final_model = GE_learn(data, init_model, param) util.save_model(final_model, final_model_path) return # parse arguments, get data and model, output prediction
def describe(name): """ Describe the dataset """ df = load_data(name) s = df.groupby(level=[0, 1]).size() print('Dataset :', name) print('Users :', len(s.groupby(level=0))) print('Sessions/user :', s.groupby(level=0).size().mean()) print('Sample size :', s.mean(), '+/-', s.std()) print('Mean pp interval (ms) :', df.groupby(level=[0, 1]).apply(lambda x: x['timepress'].diff().dropna().mean()).mean()) print('Mean duration (ms) :', df.groupby(level=[0, 1]).apply(lambda x: (x['timerelease'] - x['timepress']).mean()).mean()) for target in TARGETS[1:]: s = df.reset_index().groupby([target, 'session']).size().groupby(level=0).size() print(target) print(s / s.sum()) return
def obfuscate_keystrokes(name, strategy, param): """ """ df = load_data(name) df = df.groupby(level=[0, 1]).apply(keystrokes2events).reset_index(level=[2, 3], drop=True) if strategy == 'delay': df = df.groupby(level=[0, 1]).apply(lambda x: delay_mix(x, param)) elif strategy == 'interval': df = df.groupby(level=[0, 1]).apply(lambda x: interval_mix(x, param)) else: raise Exception('Unknown masking strategy') df = df.groupby(level=[0, 1]).apply(events2keystrokes).reset_index(level=[2, 3], drop=True) save_data(df, name, masking=(strategy, param)) return
def main(): # img_width, img_height = 48, 48 img_width, img_height = 200, 60 img_channels = 1 # batch_size = 1024 batch_size = 32 nb_epoch = 1000 post_correction = False save_dir = 'save_model/' + str(datetime.now()).split('.')[0].split()[0] + '/' # model is saved corresponding to the datetime train_data_dir = 'train_data/ip_train/' # train_data_dir = 'train_data/single_1000000/' val_data_dir = 'train_data/ip_val/' test_data_dir = 'test_data//' weights_file_path = 'save_model/2016-10-27/weights.11-1.58.hdf5' char_set, char2idx = get_char_set(train_data_dir) nb_classes = len(char_set) max_nb_char = get_maxnb_char(train_data_dir) label_set = get_label_set(train_data_dir) # val 'char_set:', char_set print 'nb_classes:', nb_classes print 'max_nb_char:', max_nb_char print 'size_label_set:', len(label_set) model = build_shallow(img_channels, img_width, img_height, max_nb_char, nb_classes) # build CNN architecture # model.load_weights(weights_file_path) # load trained model val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx) # val_data = None train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx) train(model, batch_size, nb_epoch, save_dir, train_data, val_data, char_set) # train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx) # test(model, train_data, char_set, label_set, post_correction) # val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx) # test(model, val_data, char_set, label_set, post_correction) # test_data = load_data(test_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx) # test(model, test_data, char_set, label_set, post_correction)
def ge_cmd_predict(): args = parse_arg_predict() # prepare input to GE_learn data = util.load_data(args.data) model = util.load_model(args.model) pred_path = args.output pred = GE_predict(data, model) util.write_prediction(pred, pred_path) return
def load_data(x_path, y_path, shuffle=True): xs, ys = load_dataset_csv(x_path, y_path) n = len(xs) shuffle_indices = range(n) np.random.shuffle(shuffle_indices) return xs, ys, n
def preprocess_villani(in_file, out_file, long_fixed_out_file): """ Preprocess the raw Villani dataset and extend the long fixed dataset """ df = pd.read_csv(in_file, index_col=[0, 1]) # Make age a binary target, <30 and >=30 df['age'] = df['agegroup'].map({ 'under20': '<30', '20-29': '<30', '30-39': '>=30', '40-49': '>=30', '50-59': '>=30', 'over60': '>=30'} ) # Ignore missing data df = df.dropna() df = remove_repeated_keys(df) # combine the villani fixed text with citefa dataset fixed text long_fixed = load_data('long_fixed') slf = long_fixed.groupby(level=[0, 1]).size() villani_fixed = df[df['inputtype'] == 'fixed'] villani_fixed = villani_fixed.groupby(level=[0, 1]).apply(lambda x: make_sessions(x, slf.mean(), slf.std())) villani_fixed = villani_fixed.reset_index(level=[0, 1], drop=True) villani_fixed = reduce_dataset(villani_fixed, min_samples=10, max_samples=10) long_fixed = pd.concat([long_fixed, villani_fixed]) long_fixed = long_fixed[COLS] long_fixed.to_csv(long_fixed_out_file) # Free-text input only villani_free = df[df['inputtype'] == 'free'] villani_free = villani_free.groupby(level=[0, 1]).apply(lambda x: make_sessions(x, slf.mean(), slf.std())) villani_free = villani_free.reset_index(level=[0, 1], drop=True) villani_free = reduce_dataset(villani_free, min_samples=10, max_samples=10) villani_free = villani_free[COLS] villani_free.to_csv(out_file) return