Python data_utils 模块，prepare_custom_data() 实例源码

我们从Python开源项目中，提取了以下3个代码示例，用于说明如何使用data_utils.prepare_custom_data()。

项目：Seq2Seq-Chatbot 作者：FR0ST1N | 项目源码 | 文件源码

def train():
  # prepare dataset
  print("Starting to train from " + working_directory)
  enc_train, dec_train, _, _ = data_utils.prepare_custom_data(working_directory,train_enc,train_dec,enc_vocab_size,dec_vocab_size)

  gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
  config = tf.ConfigProto(gpu_options=gpu_options)
  config.gpu_options.allocator_type = 'BFC'

  with tf.Session(config=config) as sess:
    print("Creating model with %d layers and %d cells." % (num_layers, layer_size))
    model = create_model(sess, False)
    train_set = read_data(enc_train, dec_train, max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))


    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / steps_per_checkpoint
      loss += step_loss / steps_per_checkpoint
      current_step += 1

      if current_step % steps_per_checkpoint == 0:
        #perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("Saved model at step %d with time %.2f "
                % (model.global_step.eval(),
                         step_time))
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        checkpoint_path = os.path.join(working_directory, "seq2seq.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        sys.stdout.flush()

项目：Seq2Seq-Chatbot 作者：FR0ST1N | 项目源码 | 文件源码

def train():
  # prepare dataset
  print("Starting to train from " + working_directory)
  enc_train, dec_train, _, _ = data_utils.prepare_custom_data(working_directory,train_enc,train_dec,enc_vocab_size,dec_vocab_size)

  gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
  config = tf.ConfigProto(gpu_options=gpu_options)
  config.gpu_options.allocator_type = 'BFC'

  with tf.Session(config=config) as sess:
    print("Creating model with %d layers and %d cells." % (num_layers, layer_size))
    model = create_model(sess, False)
    train_set = read_data(enc_train, dec_train, max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))


    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    count = 0
    while True:
      count += 1
      print('Step: ' + str(count))
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / steps_per_checkpoint
      loss += step_loss / steps_per_checkpoint
      current_step += 1

      if current_step % steps_per_checkpoint == 0:
        perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("Saved model at step %d with perplexity %.2f "
                % (model.global_step.eval(),
                         perplexity))
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        checkpoint_path = os.path.join(working_directory, "seq2seq.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        sys.stdout.flush()

项目：Seq2Seq-Chatbot 作者：FR0ST1N | 项目源码 | 文件源码

def train():
  # prepare dataset
  print("Starting to train from " + working_directory)
  enc_train, dec_train, _, _ = data_utils.prepare_custom_data(working_directory,train_enc,train_dec,enc_vocab_size,dec_vocab_size)

  gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.666)
  config = tf.ConfigProto(gpu_options=gpu_options)
  config.gpu_options.allocator_type = 'BFC'

  with tf.Session(config=config) as sess:
    print("Creating model with %d layers and %d cells." % (num_layers, layer_size))
    model = create_model(sess, False)
    train_set = read_data(enc_train, dec_train, max_train_data_size)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(_buckets))]
    train_total_size = float(sum(train_bucket_sizes))


    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    step_time, loss = 0.0, 0.0
    current_step = 0
    previous_losses = []
    while True:
      random_number_01 = np.random.random_sample()
      bucket_id = min([i for i in xrange(len(train_buckets_scale))
                       if train_buckets_scale[i] > random_number_01])

      start_time = time.time()
      encoder_inputs, decoder_inputs, target_weights = model.get_batch(
          train_set, bucket_id)
      _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                   target_weights, bucket_id, False)
      step_time += (time.time() - start_time) / steps_per_checkpoint
      loss += step_loss / steps_per_checkpoint
      current_step += 1

      if current_step % steps_per_checkpoint == 0:
        #perplexity = math.exp(loss) if loss < 300 else float('inf')
        print ("Saved model at step %d with time %.2f "
                % (model.global_step.eval(),
                         step_time))
        if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
          sess.run(model.learning_rate_decay_op)
        previous_losses.append(loss)
        checkpoint_path = os.path.join(working_directory, "seq2seq.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)
        step_time, loss = 0.0, 0.0
        sys.stdout.flush()