Python tensorflow 模块,learn() 实例源码

我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用tensorflow.learn()

项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_dask_iris_classification(self):
    if HAS_DASK and HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      data = dd.from_pandas(data, npartitions=2)
      labels = pd.DataFrame(iris.target)
      labels = dd.from_pandas(labels, npartitions=2)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      predictions = data.map_partitions(classifier.predict).compute()
      score = accuracy_score(labels.compute(), predictions)
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_dask_iris_classification(self):
    if HAS_DASK and HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      import dask.dataframe as dd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      data = dd.from_pandas(data, npartitions=2)
      labels = pd.DataFrame(iris.target)
      labels = dd.from_pandas(labels, npartitions=2)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      predictions = data.map_partitions(classifier.predict).compute()
      score = accuracy_score(labels.compute(), predictions)
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
项目:seq2seq    作者:google    | 项目源码 | 文件源码
def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)
项目:DualEncoder    作者:nachoaguadoc    | 项目源码 | 文件源码
def get_feature_columns(mode):
  feature_columns = []

  feature_columns.append(tf.contrib.layers.real_valued_column(
    column_name="context", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
  feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="context_len", dimension=1, dtype=tf.int64))
  feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="utterance", dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
  feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="utterance_len", dimension=1, dtype=tf.int64))

  if mode == tf.contrib.learn.ModeKeys.TRAIN:
    # During training we have a label feature
    feature_columns.append(tf.contrib.layers.real_valued_column(
      column_name="label", dimension=1, dtype=tf.int64))

  if mode == tf.contrib.learn.ModeKeys.EVAL:
    # During evaluation we have distractors
    for i in range(9):
      feature_columns.append(tf.contrib.layers.real_valued_column(
        column_name="distractor_{}".format(i), dimension=TEXT_FEATURE_SIZE, dtype=tf.int64))
      feature_columns.append(tf.contrib.layers.real_valued_column(
        column_name="distractor_{}_len".format(i), dimension=1, dtype=tf.int64))

  return set(feature_columns)
项目:DualEncoder    作者:nachoaguadoc    | 项目源码 | 文件源码
def create_input_fn(mode, input_files, batch_size, num_epochs):
  def input_fn():
    features = tf.contrib.layers.create_feature_spec_for_parsing(
        get_feature_columns(mode))

    feature_map = tf.contrib.learn.io.read_batch_features(
        file_pattern=input_files,
        batch_size=batch_size,
        features=features,
        reader=tf.TFRecordReader,
        randomize_input=True,
        num_epochs=num_epochs,
        queue_capacity=200000 + batch_size * 10,
        name="read_batch_features_{}".format(mode))

    # This is an ugly hack because of a current bug in tf.learn
    # During evaluation TF tries to restore the epoch variable which isn't defined during training
    # So we define the variable manually here
    if mode == tf.contrib.learn.ModeKeys.TRAIN:
      tf.get_variable(
        "read_batch_features_eval/file_name_queue/limit_epochs/epochs",
        initializer=tf.constant(0, dtype=tf.int64))

    if mode == tf.contrib.learn.ModeKeys.TRAIN:
      target = feature_map.pop("label")
    else:
      # In evaluation we have 10 classes (utterances).
      # The first one (index 0) is always the correct one
      target = tf.zeros([batch_size, 1], dtype=tf.int64)
    return feature_map, target
  return input_fn
项目:conv_seq2seq    作者:tobyyouup    | 项目源码 | 文件源码
def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_pandas_dataframe(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.DataFrame(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels[0], classifier.predict(data))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    else:
      print("No pandas installed. pandas-related tests are skipped.")
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_pandas_series(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.Series(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels, classifier.predict(data))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_string_data_formats(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_pandas_dataframe(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      random.seed(42)
      iris = datasets.load_iris()
      data = pd.DataFrame(iris.data)
      labels = pd.DataFrame(iris.target)
      classifier = learn.LinearClassifier(
          feature_columns=learn.infer_real_valued_columns_from_input(data),
          n_classes=3)
      classifier.fit(data, labels, steps=100)
      score = accuracy_score(labels[0], list(classifier.predict(data)))
      self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
    else:
      print("No pandas installed. pandas-related tests are skipped.")
项目:lsdc    作者:febert    | 项目源码 | 文件源码
def test_string_data_formats(self):
    if HAS_PANDAS:
      import pandas as pd  # pylint: disable=g-import-not-at-top
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_data(pd.DataFrame({"Test": ["A", "B"]}))
      with self.assertRaises(ValueError):
        learn.io.extract_pandas_labels(pd.DataFrame({"Test": ["A", "B"]}))
项目:automatic-summarization    作者:mozilla    | 项目源码 | 文件源码
def make_input_pipeline_from_def(def_dict, mode, **kwargs):
  """Creates an InputPipeline object from a dictionary definition.

  Args:
    def_dict: A dictionary defining the input pipeline.
      It must have "class" and "params" that correspond to the class
      name and constructor parameters of an InputPipeline, respectively.
    mode: A value in tf.contrib.learn.ModeKeys

  Returns:
    A new InputPipeline object
  """
  if not "class" in def_dict:
    raise ValueError("Input Pipeline definition must have a class property.")

  class_ = def_dict["class"]
  if not hasattr(sys.modules[__name__], class_):
    raise ValueError("Invalid Input Pipeline class: {}".format(class_))

  pipeline_class = getattr(sys.modules[__name__], class_)

  # Constructor arguments
  params = {}
  if "params" in def_dict:
    params.update(def_dict["params"])
  params.update(kwargs)

  return pipeline_class(params=params, mode=mode)
项目:seq2seq    作者:google    | 项目源码 | 文件源码
def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn
项目:conv_seq2seq    作者:tobyyouup    | 项目源码 | 文件源码
def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn
项目:automatic-summarization    作者:mozilla    | 项目源码 | 文件源码
def create_input_fn(pipeline,
                    batch_size,
                    bucket_boundaries=None,
                    allow_smaller_final_batch=False,
                    scope=None):
  """Creates an input function that can be used with tf.learn estimators.
    Note that you must pass "factory funcitons" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

  Args:
    pipeline: An instance of `seq2seq.data.InputPipeline`.
    batch_size: Create batches of this size. A queue to hold a
      reasonable number of batches in memory is created.
    bucket_boundaries: int list, increasing non-negative numbers.
      If None, no bucket is performed.

  Returns:
    An input function that returns `(feature_batch, labels_batch)`
    tuples when called.
  """

  def input_fn():
    """Creates features and labels.
    """

    with tf.variable_scope(scope or "input_fn"):
      data_provider = pipeline.make_data_provider()
      features_and_labels = pipeline.read_from_data_provider(data_provider)

      if bucket_boundaries:
        _, batch = tf.contrib.training.bucket_by_sequence_length(
            input_length=features_and_labels["source_len"],
            bucket_boundaries=bucket_boundaries,
            tensors=features_and_labels,
            batch_size=batch_size,
            keep_input=features_and_labels["source_len"] >= 1,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="bucket_queue")
      else:
        batch = tf.train.batch(
            tensors=features_and_labels,
            enqueue_many=False,
            batch_size=batch_size,
            dynamic_pad=True,
            capacity=5000 + 16 * batch_size,
            allow_smaller_final_batch=allow_smaller_final_batch,
            name="batch_queue")

      # Separate features and labels
      features_batch = {k: batch[k] for k in pipeline.feature_keys}
      if set(batch.keys()).intersection(pipeline.label_keys):
        labels_batch = {k: batch[k] for k in pipeline.label_keys}
      else:
        labels_batch = None

      return features_batch, labels_batch

  return input_fn
项目:pydatalab    作者:googledatalab    | 项目源码 | 文件源码
def get_estimator(args, output_dir, features, stats, target_vocab_size):
  # Check layers used for dnn models.
  if is_dnn_model(args.model) and not args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* must be used with DNN models')
  if is_linear_model(args.model) and args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* cannot be used with linear models')

  # Build tf.learn features
  feature_columns = build_feature_columns(features, stats, args.model)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(
      save_checkpoints_steps=args.min_eval_frequency)

  train_dir = os.path.join(output_dir, 'train')
  if args.model == 'dnn_regression':
    estimator = tf.contrib.learn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_regression':
    estimator = tf.contrib.learn.LinearRegressor(
        feature_columns=feature_columns,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  elif args.model == 'dnn_classification':
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_classification':
    estimator = tf.contrib.learn.LinearClassifier(
        feature_columns=feature_columns,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  else:
    raise ValueError('bad --model-type value')

  return estimator
项目:pydatalab    作者:googledatalab    | 项目源码 | 文件源码
def get_estimator(args, output_dir, features, stats, target_vocab_size):
  # Check layers used for dnn models.
  if is_dnn_model(args.model) and not args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* must be used with DNN models')
  if is_linear_model(args.model) and args.hidden_layer_sizes:
    raise ValueError('--hidden-layer-size* cannot be used with linear models')

  # Build tf.learn features
  feature_columns = build_feature_columns(features, stats, args.model)

  # Set how often to run checkpointing in terms of steps.
  config = tf.contrib.learn.RunConfig(
      save_checkpoints_steps=args.min_eval_frequency)

  train_dir = os.path.join(output_dir, 'train')
  if args.model == 'dnn_regression':
    estimator = tf.contrib.learn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_regression':
    estimator = tf.contrib.learn.LinearRegressor(
        feature_columns=feature_columns,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  elif args.model == 'dnn_classification':
    estimator = tf.contrib.learn.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=args.hidden_layer_sizes,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.AdamOptimizer(
            args.learning_rate, epsilon=args.epsilon))
  elif args.model == 'linear_classification':
    estimator = tf.contrib.learn.LinearClassifier(
        feature_columns=feature_columns,
        n_classes=target_vocab_size,
        config=config,
        model_dir=train_dir,
        optimizer=tf.train.FtrlOptimizer(
            args.learning_rate,
            l1_regularization_strength=args.l1_regularization,
            l2_regularization_strength=args.l2_regularization))
  else:
    raise ValueError('bad --model-type value')

  return estimator
项目:pydatalab    作者:googledatalab    | 项目源码 | 文件源码
def read_examples(input_files, batch_size, shuffle, num_epochs=None):
  """Creates readers and queues for reading example protos."""
  files = []
  for e in input_files:
    for path in e.split(','):
      files.extend(file_io.get_matching_files(path))
  thread_count = multiprocessing.cpu_count()

  # The minimum number of instances in a queue from which examples are drawn
  # randomly. The larger this number, the more randomness at the expense of
  # higher memory requirements.
  min_after_dequeue = 1000

  # When batching data, the queue's capacity will be larger than the batch_size
  # by some factor. The recommended formula is (num_threads + a small safety
  # margin). For now, we use a single thread for reading, so this can be small.
  queue_size_multiplier = thread_count + 3

  # Convert num_epochs == 0 -> num_epochs is None, if necessary
  num_epochs = num_epochs or None

  # Build a queue of the filenames to be read.
  filename_queue = tf.train.string_input_producer(files, num_epochs, shuffle)

  example_id, encoded_example = tf.TextLineReader().read_up_to(
      filename_queue, batch_size)

  if shuffle:
    capacity = min_after_dequeue + queue_size_multiplier * batch_size
    return tf.train.shuffle_batch(
        [example_id, encoded_example],
        batch_size,
        capacity,
        min_after_dequeue,
        enqueue_many=True,
        num_threads=thread_count)

  else:
    capacity = queue_size_multiplier * batch_size
    return tf.train.batch(
        [example_id, encoded_example],
        batch_size,
        capacity=capacity,
        enqueue_many=True,
        num_threads=thread_count)


# ==============================================================================
# Building the TF learn estimators
# ==============================================================================
项目:cloudml-examples    作者:googlegenomics    | 项目源码 | 文件源码
def _build_input_fn(input_file_pattern, batch_size, mode):
  """Build input function.

  Args:
    input_file_pattern: The file patter for examples
    batch_size: Batch size
    mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

  Returns:
    Tuple, dictionary of feature column name to tensor and labels.
  """
  def _input_fn():
    """Supplies the input to the model.

    Returns:
      A tuple consisting of 1) a dictionary of tensors whose keys are
      the feature names, and 2) a tensor of target labels if the mode
      is not INFER (and None, otherwise).
    """
    logging.info("Reading files from %s", input_file_pattern)
    input_files = sorted(list(tf.gfile.Glob(input_file_pattern)))
    logging.info("Reading files from %s", input_files)
    include_target_column = (mode != tf.contrib.learn.ModeKeys.INFER)
    features_spec = tf.contrib.layers.create_feature_spec_for_parsing(
        feature_columns=_get_feature_columns(include_target_column))

    if FLAGS.use_gzip:
      def gzip_reader():
        return tf.TFRecordReader(
            options=tf.python_io.TFRecordOptions(
                compression_type=TFRecordCompressionType.GZIP))
      reader_fn = gzip_reader
    else:
      reader_fn = tf.TFRecordReader

    features = tf.contrib.learn.io.read_batch_features(
        file_pattern=input_files,
        batch_size=batch_size,
        queue_capacity=3*batch_size,
        randomize_input=mode == tf.contrib.learn.ModeKeys.TRAIN,
        feature_queue_capacity=FLAGS.feature_queue_capacity,
        reader=reader_fn,
        features=features_spec)
    target = None
    if include_target_column:
      target = features.pop(FLAGS.target_field)
    return features, target

  return _input_fn
项目:cloudml-examples    作者:googlegenomics    | 项目源码 | 文件源码
def _build_model_fn():
  """Build model function.

  Returns:
    A model function that can be passed to `Estimator` constructor.
  """
  def _model_fn(features, labels, mode):
    """Creates the prediction and its loss.

    Args:
      features: A dictionary of tensors keyed by the feature name.
      labels: A tensor representing the labels.
      mode: The execution mode, as defined in tf.contrib.learn.ModeKeys.

    Returns:
      A tuple consisting of the prediction, loss, and train_op.
    """
    # Generate one embedding per sparse feature column and concatenate them.
    concat_embeddings = tf.contrib.layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=_get_feature_columns(include_target_column=False))

    # Add one hidden layer.
    hidden_layer_0 = tf.contrib.layers.relu(
        concat_embeddings, FLAGS.hidden_units)

    # Output and logistic loss.
    logits = tf.contrib.layers.linear(hidden_layer_0, FLAGS.num_classes)

    predictions = tf.contrib.layers.softmax(logits)
    if mode == tf.contrib.learn.ModeKeys.INFER:
      predictions = {
          tf.contrib.learn.PredictionKey.PROBABILITIES: predictions,
          PREDICTION_KEY: features[PREDICTION_KEY]
      }
      output_alternatives = {
          DEFAULT_OUTPUT_ALTERNATIVE: (tf.contrib.learn.ProblemType.UNSPECIFIED,
                                       predictions)
      }
      return model_fn.ModelFnOps(
          mode=mode,
          predictions=predictions,
          output_alternatives=output_alternatives)

    target_one_hot = tf.one_hot(labels, FLAGS.num_classes)
    target_one_hot = tf.reduce_sum(
        input_tensor=target_one_hot, reduction_indices=[1])
    loss = tf.losses.softmax_cross_entropy(target_one_hot, logits)
    if mode == tf.contrib.learn.ModeKeys.EVAL:
      return predictions, loss, None

    opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum)
    train_op = tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=FLAGS.learning_rate,
        optimizer=opt)
    return model_fn.ModelFnOps(
        mode=mode, predictions=predictions, loss=loss, train_op=train_op)

  return _model_fn
项目:cloudml-examples    作者:googlegenomics    | 项目源码 | 文件源码
def _def_experiment(
    train_file_pattern, eval_file_pattern, batch_size):
  """Creates the function used to configure the experiment runner.

  This function creates a function that is used by the learn_runner
  module to create an Experiment.

  Args:
    train_file_pattern: The directory the train data can be found in.
    eval_file_pattern: The directory the test data can be found in.
    batch_size: Batch size

  Returns:
    A function that creates an Experiment object for the runner.
  """

  def _experiment_fn(output_dir):
    """Experiment function used by learn_runner to run training/eval/etc.

    Args:
      output_dir: String path of directory to use for outputs.

    Returns:
      tf.learn `Experiment`.
    """
    estimator = tf.contrib.learn.Estimator(
        model_fn=_build_model_fn(),
        model_dir=output_dir)
    train_input_fn = _build_input_fn(
        input_file_pattern=train_file_pattern,
        batch_size=batch_size,
        mode=tf.contrib.learn.ModeKeys.TRAIN)
    eval_input_fn = _build_input_fn(
        input_file_pattern=eval_file_pattern,
        batch_size=batch_size,
        mode=tf.contrib.learn.ModeKeys.EVAL)

    return tf.contrib.learn.Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        train_steps=FLAGS.num_train_steps,
        eval_input_fn=eval_input_fn,
        eval_steps=FLAGS.num_eval_steps,
        eval_metrics=_create_evaluation_metrics(),
        min_eval_frequency=100,
        export_strategies=[
            saved_model_export_utils.make_export_strategy(
                _predict_input_fn,
                exports_to_keep=5,
                default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE)
        ])

  return _experiment_fn