Python reader 模块,Reader() 实例源码

我们从Python开源项目中,提取了以下9个代码示例,用于说明如何使用reader.Reader()

项目:ReLiefParser    作者:XuezheMax    | 项目源码 | 文件源码
def read_data(source_path, word_alphabet, pos_alphabet, type_alphabet, max_size=None, normalize_digits=True):
    logger = utils.get_logger("Reading Data")
    data = [[] for _ in _buckets]

    counter = 0
    reader = Reader(source_path, word_alphabet, pos_alphabet, type_alphabet)
    inst = reader.getNext(normalize_digits)
    while inst is not None and (not max_size or counter < max_size):
        counter += 1
        if counter % 10000 == 0:
            logger.info("reading data: %d" % counter)

        inst_size = inst.length()
        for bucket_id, bucket_size in enumerate(_buckets):
            if inst_size <= bucket_size:
                data[bucket_id].append([inst.word_ids, inst.pos_ids, inst.heads, inst.type_ids])
                break

        inst = reader.getNext(normalize_digits)
    reader.close()
    logger.info("Total number of data: %d" % counter)
    return data
项目:pyrepl    作者:dajose    | 项目源码 | 文件源码
def readline(self, n=None):
        from reader import Reader
        try:
            # this isn't quite right: it will clobber any prompt that's
            # been printed.  Not sure how to get around this...
            return Reader(self.con).readline()
        except EOFError:
            return ''
项目:librobinson    作者:byztxt    | 项目源码 | 文件源码
def read_robinson(directory, suffix, read_what):
    read_what = readwhat.normalize(read_what)
    read_what_str = readwhat.read_what2string(read_what)
    sys.stderr.write("Now reading '%s' from directory %s with suffix %s\n" % (read_what_str, directory, suffix))
    rd = reader.Reader(directory, suffix)
    rd.read_NT(read_what)
    return rd
项目:CycleGAN-TensorFlow    作者:vanhuyz    | 项目源码 | 文件源码
def model(self):
    X_reader = Reader(self.X_train_file, name='X',
        image_size=self.image_size, batch_size=self.batch_size)
    Y_reader = Reader(self.Y_train_file, name='Y',
        image_size=self.image_size, batch_size=self.batch_size)

    x = X_reader.feed()
    y = Y_reader.feed()

    cycle_loss = self.cycle_consistency_loss(self.G, self.F, x, y)

    # X -> Y
    fake_y = self.G(x)
    G_gan_loss = self.generator_loss(self.D_Y, fake_y, use_lsgan=self.use_lsgan)
    G_loss =  G_gan_loss + cycle_loss
    D_Y_loss = self.discriminator_loss(self.D_Y, y, self.fake_y, use_lsgan=self.use_lsgan)

    # Y -> X
    fake_x = self.F(y)
    F_gan_loss = self.generator_loss(self.D_X, fake_x, use_lsgan=self.use_lsgan)
    F_loss = F_gan_loss + cycle_loss
    D_X_loss = self.discriminator_loss(self.D_X, x, self.fake_x, use_lsgan=self.use_lsgan)

    # summary
    tf.summary.histogram('D_Y/true', self.D_Y(y))
    tf.summary.histogram('D_Y/fake', self.D_Y(self.G(x)))
    tf.summary.histogram('D_X/true', self.D_X(x))
    tf.summary.histogram('D_X/fake', self.D_X(self.F(y)))

    tf.summary.scalar('loss/G', G_gan_loss)
    tf.summary.scalar('loss/D_Y', D_Y_loss)
    tf.summary.scalar('loss/F', F_gan_loss)
    tf.summary.scalar('loss/D_X', D_X_loss)
    tf.summary.scalar('loss/cycle', cycle_loss)

    tf.summary.image('X/generated', utils.batch_convert2int(self.G(x)))
    tf.summary.image('X/reconstruction', utils.batch_convert2int(self.F(self.G(x))))
    tf.summary.image('Y/generated', utils.batch_convert2int(self.F(y)))
    tf.summary.image('Y/reconstruction', utils.batch_convert2int(self.G(self.F(y))))

    return G_loss, D_Y_loss, F_loss, D_X_loss, fake_y, fake_x
项目:pefile.pypy    作者:cloudtracer    | 项目源码 | 文件源码
def readline(self, n=None):
        from reader import Reader
        try:
            # this isn't quite right: it will clobber any prompt that's
            # been printed.  Not sure how to get around this...
            return Reader(self.con).readline()
        except EOFError:
            return ''
项目:monogreedy    作者:jinjunqi    | 项目源码 | 文件源码
def eva_a_phi(phi):
    na, nnh, nh, nw = phi

    # choose a dataset to train (mscoco, flickr8k, flickr30k)
    dataset = 'mscoco'
    data_dir = osp.join(DATA_ROOT, dataset)

    from model.ra import Model
    # settings
    mb = 64  # mini-batch size
    lr = 0.0002  # learning rate
    # nh = 512  # size of LSTM's hidden size
    # nnh = 512  # hidden size of attention mlp
    # nw = 512  # size of word embedding vector
    # na = 512  # size of the region features after dimensionality reduction
    name = 'ra'  # model name, just setting it to 'ra' is ok. 'ra'='region attention'
    vocab_freq = 'freq5'  # use the vocabulary that filtered out words whose frequences are less than 5

    print '... loading data {}'.format(dataset)
    train_set = Reader(batch_size=mb, data_split='train', vocab_freq=vocab_freq, stage='train',
                       data_dir=data_dir, feature_file='features_30res.h5', topic_switch='off') # change 0, 1000, 82783
    valid_set = Reader(batch_size=1, data_split='val', vocab_freq=vocab_freq, stage='val',
                       data_dir=data_dir, feature_file='features_30res.h5',
                       caption_switch='off', topic_switch='off') # change 0, 10, 5000

    npatch, nimg = train_set.features.shape[1:]
    nout = len(train_set.vocab)
    save_dir = '{}-nnh{}-nh{}-nw{}-na{}-mb{}-V{}'.\
        format(dataset.lower(), nnh, nh, nw, na, mb, nout)
    save_dir = osp.join(SAVE_ROOT, save_dir)

    model_file, m = find_last_snapshot(save_dir, resume_training=False)
    os.system('cp model/ra.py {}/'.format(save_dir))
    logger = Logger(save_dir)
    logger.info('... building')
    model = Model(name=name, nimg=nimg, nnh=nnh, nh=nh, na=na, nw=nw, nout=nout, npatch=npatch, model_file=model_file)

    # start training
    bs = BeamSearch([model], beam_size=1, num_cadidates=100, max_length=20)
    best = train(model, bs, train_set, valid_set, save_dir, lr,
                 display=100, starting=m, endding=20, validation=2000, life=10, logger=logger) # change dis1,100; va 2,2000; life 0,10;
    average_models(best=best, L=6, model_dir=save_dir, model_name=name+'.h5') # L 1, 6

    # evaluation
    np.save('data_dir', data_dir)
    np.save('save_dir', save_dir)

    os.system('python valid_time.py')

    scores = np.load('scores.npy')
    running_time = np.load('running_time.npy')
    print 'cider:', scores[-1], 'B1-4,C:', scores, 'running time:', running_time

    return scores, running_time
项目:crnn_tf    作者:liuhu-bigeye    | 项目源码 | 文件源码
def main():
    if len(sys.argv) == 3:
        config = Config(sys.argv[1], sys.argv[2])
    else:
        assert False

    phase = config.items['phase']
    from reader import Reader
    train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=True)
    valid_set = Reader(phase='val', batch_size=10, do_shuffle=False)
    test_set = Reader(phase='test', batch_size=10, do_shuffle=False)


    glog.info('generating model...')
    from model import Model

    # with tf.device('/cpu:0'):
    # with tf.device('/gpu:%d'%config.items['gpu']):
    model = Model(config.items['lr'])

    # try:
    #     config.items['starting'] = int(config.items['model'].split('_')[-1])
    # except:
    config.items['starting'] = 0

    # snapshot path
    mkdir_safe(config.items['snap_path'])

    sess_config = tf.ConfigProto(allow_soft_placement=True, device_count = {'GPU': 4})
    sess_config.gpu_options.allow_growth = True


    with tf.Session(config=sess_config) as sess:
        tf.global_variables_initializer().run()
        if 'model' in config.items.keys():
            model.saver.restore(sess, config.items['model'])
            glog.info('loading model: %s...' % config.items['model'])
        if phase == 'ctc':
            glog.info('ctc training...')
            train_valid(sess, model, train_set, valid_set, test_set, config)
        # elif phase == 'extract_feature':
        #     pass
        # elif phase == 'get_prediction':
        #     from reader import Reader
        #     train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False)
        #     glog.info('feature extracting...')
        #     get_prediction(model, train_set, config)
        # elif phase == 'top_k_prediction':
        #     from reader import Reader
        #     train_set = Reader(phase='test', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False)
        #     glog.info('feature extracting...')
        #     get_top_k_prediction(model, train_set, config)

    glog.info('end')
项目:crnn_tf    作者:liuhu-bigeye    | 项目源码 | 文件源码
def main():
    if len(sys.argv) == 3:
        config = Config(sys.argv[1], sys.argv[2])
    else:
        assert False
    from utils import mkdir_safe, log_self
    log_self(__file__)

    glog.info('generating model...')
    from model_after import Model
    model = Model(learning_rate=config.items['lr'], config=config)

    # load model
    if 'model' in config.items.keys():
        glog.info('loading model: %s...' % config.items['model'])
        model.load_model(config.items['model'])
    elif 'model_old' in config.items.keys():
        glog.info('loading model from old: %s...' % config.items['model_old'])
        model.load_old_model(config.items['model_old'])

    from reader import Reader
    train_set = Reader(phase='train', config=config, do_shuffle=True, resample=True)
    valid_set = Reader(phase='dev', config=config, do_shuffle=True, resample=False, feature_mean=train_set.feature_mean, feature_std=train_set.feature_std)
    test_set = Reader(phase='test', config=config, do_shuffle=True, resample=False, feature_mean=train_set.feature_mean, feature_std=train_set.feature_std)

    try:
        config.items['starting'] = int(config.items['model'].split('_')[-1])
    except:
        config.items['starting'] = 0

    if 'predict' in config.items.keys():
        prob_predict(model, train_set, config, epoch=config.items['starting'])
        prob_predict(model, valid_set, config, epoch=config.items['starting'])
        prob_predict(model, test_set, config, epoch=config.items['starting'])
        return

    # snapshot path
    mkdir_safe(config.items['snap_path'])
    mkdir_safe(os.path.join(config.items['snap_path'], 'output_dev'))
    mkdir_safe(os.path.join(config.items['snap_path'], 'output_test'))

    glog.info('training...')
    train_valid(model, train_set, valid_set, test_set, config)

    glog.info('end')
项目:crnn_tf    作者:liuhu-bigeye    | 项目源码 | 文件源码
def main():
    if len(sys.argv) == 3:
        config = Config(sys.argv[1], sys.argv[2])
    else:
        assert False

    phase = config.items['phase']

    glog.info('generating model...')
    from model_me import Model

    with tf.device('/cpu:0'):
    # with tf.device('/gpu:%d'%config.items['gpu']):
        model = Model()

    # try:
    #     config.items['starting'] = int(config.items['model'].split('_')[-1])
    # except:
    config.items['starting'] = 0

    # snapshot path
    mkdir_safe(config.items['snap_path'])
    mkdir_safe(os.path.join(config.items['snap_path'], 'output_dev'))
    mkdir_safe(os.path.join(config.items['snap_path'], 'output_test'))

    sess_config = tf.ConfigProto(device_count = {'GPU': 0})
    # sess_config = tf.ConfigProto(allow_soft_placement=True)
    # sess_config.gpu_options.allow_growth = True
    from reader import Reader

    train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=True, resample=True, distortion=True)
    valid_set = None#Reader(phase='dev', batch_size=1, do_shuffle=False, resample=False, distortion=False)
    test_set = None#Reader(phase='test', batch_size=1, do_shuffle=False, resample=False, distortion=False)

    with tf.Session(config=sess_config) as sess:
        tf.global_variables_initializer().run()
        if 'model' in config.items.keys():
            model.assign_from_pkl(config.items['model'])
            pdb.set_trace()

            glog.info('loading model: %s...' % config.items['model'])
        if phase == 'ctc':
            # model.make_functions()
            glog.info('ctc training...')
            train_valid(sess, model, train_set, valid_set, test_set, config)
        elif phase == 'extract_feature':
            pass
        elif phase == 'get_prediction':
            from reader import Reader
            train_set = Reader(phase='train', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False)
            glog.info('feature extracting...')
            get_prediction(model, train_set, config)
        elif phase == 'top_k_prediction':
            from reader import Reader
            train_set = Reader(phase='test', batch_size=config.items['batch_size'], do_shuffle=False, resample=False, distortion=False)
            glog.info('feature extracting...')
            get_top_k_prediction(model, train_set, config)

    glog.info('end')