这里对tensorflow官网CIFAR10的代码注释,阅读的过程中发现很多API接口不是很熟悉,跟平时的例子placeholder不一样,所以jupyter notebook重新整理代码并注释加深对代码的理解。通过单GPU和双GPU的训练结果对比,发现两者对训练结果没有多大的区别。

1)定义输入相关的函数

In [82]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from six.moves import xrange
import tensorflow as tf
from datetime import datetime
import time
import os
import re
import sys
import tarfile
import numpy as np
import math

from six.moves import urllib

#描述cifar10相关的统计量
IMAGE_SIZE = 24
NUM_CLASSES = 10
#每一个EPOCH要训练的样本数
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
#每一个EPOCH要评估的样本数
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000

Note: CIFAR10二进制版本的文件包含 data_batch_1.bin, data_batch_2.bin, ..., data_batch_5.bin,还有 test_batch.bin。每个文件的格式如下: (1 x label)(3072 x pixel) ... (1 x label)(3072 x pixel)

1.1)函数read_cifar10从文件队列按照固定长度逐条读取训练样本内容和标签

In [55]:
def read_cifar10(filename_queue):
  """读取和解析CIFAR10的数据样本

  Args:
    filename_queue: 文件名的队列

  Returns:
    An object representing a single example, with the following fields:
      height: number of rows in the result (32)
      width: number of columns in the result (32)
      depth: number of color channels in the result (3)
      key: a scalar string Tensor describing the filename & record number for this example.
      label: an int32 Tensor with the label in the range 0..9.
      uint8image: a [height, width, depth] uint8 Tensor with the image data
  """

  class CIFAR10Record(object):
    pass
  result = CIFAR10Record()

  label_bytes = 1 
  result.height = 32
  result.width = 32
  result.depth = 3
  #每张图片的长度
  image_bytes = result.height * result.width * result.depth
  #标签加图片的长度 
  record_bytes = label_bytes + image_bytes

  # Read a record, getting filenames from the filename_queue.  No
  # header or footer in the CIFAR-10 format, so we leave header_bytes
  # and footer_bytes at their default of 0.
  #tf.FixedLengthRecordReader.__init__(record_bytes, header_bytes=None, footer_bytes=None, name=None)  
  #header_bytes: An optional int. Defaults to 0.
  #footer_bytes: An optional int. Defaults to 0.
  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
  #key为文件名,value为一个字符串标量且可以被一个或多个解析器,或者转换操作将其解码为张量并且构造成为样本
  result.key, value = reader.read(filename_queue)

  # Convert from a string to a vector of uint8 that is record_bytes long.
  #decode_raw操作可以将一个字符串转换为一个uint8的张量
  record_bytes = tf.decode_raw(value, tf.uint8)

  # The first bytes represent the label, which we convert from uint8->int32.
  #提取记录的标志
  result.label = tf.cast(tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)

  # The remaining bytes after the label represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
  #提取记录的内容
  depth_major = tf.reshape(
      tf.strided_slice(record_bytes, [label_bytes],
                       [label_bytes + image_bytes]),
      [result.depth, result.height, result.width])
  # Convert from [depth, height, width] to [height, width, depth].
  result.uint8image = tf.transpose(depth_major, [1, 2, 0])

  return result

1.2)函数_generate_image_and_label_batch根据image和label生成批次

In [56]:
def _generate_image_and_label_batch(image, label, min_queue_examples,
                                    batch_size, shuffle):
  """Construct a queued batch of images and labels.

  Args:
    image: 3-D Tensor of [height, width, 3] of type.float32.
    label: 1-D Tensor of type.int32
    min_queue_examples: int32, minimum number of samples to retain in the queue that provides of batches of examples.
    batch_size: Number of images per batch.
    shuffle: boolean indicating whether to use a shuffling queue.

  Returns:
    images: Images. 4D tensor of [batch_size, height, width, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  # Create a queue that shuffles the examples, and then
  # read 'batch_size' images + labels from the example queue.
  num_preprocess_threads = 16
  if shuffle:
    #tf.train.shuffle_batch(tensor_list, batch_size, capacity, 
    #    min_after_dequeue, num_threads=1, seed=None, enqueue_many=False, shapes=None, name=None)
    #tf.train.shuffle_batch returns A list of tensors with the same number and types as tensor_list.
    #tf.train.shuffle_batch会使用多个出队队列生成批次
    images, label_batch = tf.train.shuffle_batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size,
        min_after_dequeue=min_queue_examples)
  else:
    #和tf.train.shuffle_batch类似,只是少了shffule过程
    images, label_batch = tf.train.batch(
        [image, label],
        batch_size=batch_size,
        num_threads=num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size)

  # Display the training images in the visualizer.
  tf.summary.image('images', images)

  return images, tf.reshape(label_batch, [batch_size])

1.3)函数distorted_inputs_raw处理文件队列并调用1.1和1.2的函数生成批次

In [57]:
def distorted_inputs_raw(data_dir, batch_size):
  """Construct distorted input for CIFAR training using the Reader ops.
  Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  #获取文件名
  filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in xrange(1, 6)]
  for f in filenames:
    #tf.gfile
    #1. 提供了一个处理Python文件类型的API
    #2. 提供了一种基于TensorFlow的C++文件系统的API实现
    #TensorFlow的C++文件系统API支持多种文件系统的实现,比如本地文件,谷歌云、HDFS)。所以不管你底层是以上的那种文件系统,都是用gfile来进行文件操作就ok了
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)

  # Create a queue that produces the filenames to read.
  #获取要读取文件的队列
  filename_queue = tf.train.string_input_producer(filenames)

  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = IMAGE_SIZE
  width = IMAGE_SIZE

  # Image processing for training the network. Note the many random
  # distortions applied to the image.

  # Randomly crop a [height, width] section of the image.
  #tf.image.random_crop(image, size, seed=None, name=None)
  #   --image: 3-D tensor of shape [height, width, channels]
  #   --size: 1-D tensor with two elements, specifying target [height, width]
  #对图片进行随机裁剪  
  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])

  # Randomly flip the image horizontally.
  # 随机左右翻转
  distorted_image = tf.image.random_flip_left_right(distorted_image)

  # Because these operations are not commutative, consider randomizing
  # the order their operation.
  # NOTE: since per_image_standardization zeros the mean and makes
  # the stddev unit, this likely has no effect see tensorflow#1458.
  distorted_image = tf.image.random_brightness(distorted_image,
                                               max_delta=63)
  distorted_image = tf.image.random_contrast(distorted_image,
                                             lower=0.2, upper=1.8)

  # Subtract off the mean and divide by the variance of the pixels.
  #对图像数据标准化处理  
  float_image = tf.image.per_image_standardization(distorted_image)

  # Set the shapes of tensors.
  float_image.set_shape([height, width, 3])
  read_input.label.set_shape([1])

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                           min_fraction_of_examples_in_queue)
  print ('Filling queue with %d CIFAR images before starting to train. '
         'This will take a few minutes.' % min_queue_examples)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=True)

1.4)函数inputs_raw生成文件队列并调用1.1和1.2生成批次,同时支持测试数据的输入

In [58]:
def inputs_raw(eval_data, data_dir, batch_size):
  """Construct input for CIFAR evaluation using the Reader ops.

  Args:
    eval_data: bool, indicating if one should use the train or eval data set.
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
  """
  if not eval_data:
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in xrange(1, 6)]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
  else:
    filenames = [os.path.join(data_dir, 'test_batch.bin')]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL

  for f in filenames:
    if not tf.gfile.Exists(f):
      raise ValueError('Failed to find file: ' + f)

  # Create a queue that produces the filenames to read.
  filename_queue = tf.train.string_input_producer(filenames)

  # Read examples from files in the filename queue.
  read_input = read_cifar10(filename_queue)
  reshaped_image = tf.cast(read_input.uint8image, tf.float32)

  height = IMAGE_SIZE
  width = IMAGE_SIZE

  # Image processing for evaluation.
  # Crop the central [height, width] of the image.
  resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
                                                         height, width)

  # Subtract off the mean and divide by the variance of the pixels.
  float_image = tf.image.per_image_standardization(resized_image)

  # Set the shapes of tensors.
  float_image.set_shape([height, width, 3])
  read_input.label.set_shape([1])

  # Ensure that the random shuffling has good mixing properties.
  min_fraction_of_examples_in_queue = 0.4
  min_queue_examples = int(num_examples_per_epoch *
                           min_fraction_of_examples_in_queue)

  # Generate a batch of images and labels by building up a queue of examples.
  return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size,
                                         shuffle=False)

1.5)测试输入函数的效果

In [59]:
def testInput():
    DATA_DIR='/mnt/data/software/models/tutorials/image/cifar10/cifar-10-batches-bin'
    BATCH_SIZE=256
    #b_images,b_labels=distorted_inputs_raw(DATA_DIR, BATCH_SIZE)
    b_images,b_labels=inputs_raw(False,DATA_DIR, BATCH_SIZE)
    with tf.Session() as sess:
        #This class implements a simple mechanism to coordinate the termination of a set of threads.
        coord=tf.train.Coordinator()
        #Starts all queue runners collected in the graph.
        #Session used to run the queue ops. Defaults to the default session.
        #return A list of threads.
        threads=tf.train.start_queue_runners(coord=coord)
        for i in range(1):
            #eval() In a session, computes and returns the value of this variable.
            #Usage with the default session.  The 'with' block above makes 'sess' the default session.
            print(b_images.eval())
            print(b_labels.eval())
        coord.request_stop()
        coord.join(threads)
    
testInput()  
[[[[-0.34085315  0.53541332  1.18863022]
   [-0.42051375  0.43982065  1.10896957]
   [-0.32492104  0.55134547  1.20456231]
   ..., 
   [-0.26119256  0.61507392  1.26829076]
   [-0.27712467  0.59914184  1.25235868]
   [-0.29305679  0.58320969  1.23642659]]

  [[-0.29305679  0.59914184  1.23642659]
   [-0.35678527  0.55134547  1.18863022]
   [-0.34085315  0.55134547  1.18863022]
   ..., 
   [-0.30898893  0.58320969  1.23642659]
   [-0.30898893  0.58320969  1.23642659]
   [-0.30898893  0.56727761  1.23642659]]

  [[-0.65949553  0.32829583  0.90185207]
   [-0.54797071  0.40795642  0.99744481]
   [-0.51610643  0.37609217  0.94964844]
   ..., 
   [-0.32492104  0.58320969  1.25235868]
   [-0.32492104  0.58320969  1.25235868]
   [-0.30898893  0.59914184  1.26829076]]

  ..., 
  [[-0.93034154 -0.73915613 -0.53203857]
   [-0.62763131 -0.54797071 -0.38864949]
   [-0.38864949 -0.46831009 -0.45237797]
   ..., 
   [-1.88626862 -1.10559487 -0.61169916]
   [-1.79067588 -0.7072919  -0.0381429 ]
   [-1.80660808 -0.77102035 -0.14966774]]

  [[-1.21711969 -0.89847732 -0.73915613]
   [-1.45610142 -1.12152696 -0.88254517]
   [-1.0418663  -0.9144094  -0.69135976]
   ..., 
   [-1.9022007  -1.0577985  -0.62763131]
   [-1.94999707 -0.99406999 -0.45237797]
   [-1.99779344 -0.97813791 -0.45237797]]

  [[-1.26491606 -0.86661303 -0.62763131]
   [-1.07373059 -0.8347488  -0.75508821]
   [-0.59576702 -0.26119256  0.07338192]
   ..., 
   [-1.98186135 -1.18525541 -0.85068095]
   [-2.0296576  -1.21711969 -0.77102035]
   [-2.09338617 -1.13745904 -0.64356339]]]


 [[[ 0.23127601 -0.6053977  -1.33526194]
   [ 0.1778713  -0.6053977  -1.33526194]
   [ 0.16006973 -0.58759612 -1.26405573]
   ..., 
   [ 0.28468072 -0.48078671 -1.17504787]
   [ 0.1778713  -0.56979454 -1.24625409]
   [ 0.14226817 -0.56979454 -1.26405573]]

  [[ 0.44489482 -0.40958044 -1.17504787]
   [ 0.355887   -0.48078671 -1.22845256]
   [ 0.32028386 -0.48078671 -1.21065092]
   ..., 
   [ 0.44489482 -0.24936631 -0.83681804]
   [ 0.16006973 -0.55199295 -1.17504787]
   [-0.07135064 -0.76561177 -1.37086511]]

  [[ 0.44489482 -0.40958044 -1.1928494 ]
   [ 0.37368855 -0.49858826 -1.26405573]
   [ 0.19567287 -0.65880239 -1.44207132]
   ..., 
   [ 0.71191835 -0.08915221 -0.74781024]
   [ 0.32028386 -0.42738199 -1.10384154]
   [-0.05354907 -0.76561177 -1.33526194]]

  ..., 
  [[ 0.42709327 -0.3739773  -1.17504787]
   [ 0.49829954 -0.30277103 -1.10384154]
   [ 0.74752146 -0.05354907 -0.76561177]
   ..., 
   [ 0.44489482  0.0176572  -1.05043685]
   [ 0.28468072 -0.10695378 -1.28185725]
   [ 0.1778713  -0.33837417 -1.28185725]]

  [[ 0.19567287 -0.62319922 -1.24625409]
   [ 0.40929168 -0.40958044 -1.10384154]
   [ 0.55170423 -0.23156475 -1.03263533]
   ..., 
   [ 0.56950581 -0.10695378 -1.01483369]
   [ 0.33808541 -0.32057258 -1.1928494 ]
   [ 0.37368855 -0.3739773  -1.15724623]]

  [[ 0.355887   -0.40958044 -1.37086511]
   [ 0.26687914 -0.51638985 -1.35306346]
   [ 0.48049796 -0.30277103 -1.10384154]
   ..., 
   [ 0.67631519 -0.03574751 -0.85461962]
   [ 0.4626964  -0.17816006 -1.01483369]
   [ 0.56950581 -0.07135064 -0.96142906]]]


 [[[ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   ..., 
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]]

  [[ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   ..., 
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]]

  [[ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   [ 1.7729423   1.7729423   1.7729423 ]
   ..., 
   [ 1.76020944  1.74747658  1.7729423 ]
   [ 1.7729423   1.73474371  1.7729423 ]
   [ 1.7729423   1.76020944  1.7729423 ]]

  ..., 
  [[-0.63357127 -0.55717397 -0.5444411 ]
   [-0.63357127 -0.55717397 -0.53170824]
   [-0.74816716 -0.67176986 -0.64630413]
   ..., 
   [-0.73543423 -0.55717397 -0.51897538]
   [-0.72270137 -0.5444411  -0.50624245]
   [-0.73543423 -0.55717397 -0.51897538]]

  [[-0.50624245 -0.40437946 -0.41711235]
   [-0.50624245 -0.39164659 -0.41711235]
   [-0.62083834 -0.50624245 -0.53170824]
   ..., 
   [-0.64630413 -0.46804386 -0.42984521]
   [-0.65903699 -0.48077673 -0.44257811]
   [-0.70996851 -0.53170824 -0.49350962]]

  [[-0.34071508 -0.20065345 -0.23885208]
   [-0.44257811 -0.30251646 -0.35344797]
   [-0.49350962 -0.35344797 -0.40437946]
   ..., 
   [-0.72270137 -0.5444411  -0.50624245]
   [-0.70996851 -0.53170824 -0.49350962]
   [-0.72270137 -0.5444411  -0.50624245]]]


 ..., 
 [[[ 0.79483747  1.21636224  1.46927702]
   [ 0.8116985   1.25008416  1.50299907]
   [ 0.82855946  1.2838062   1.53672099]
   ..., 
   [-0.53718072 -1.02614939 -1.34650826]
   [-0.67206866 -1.00928843 -1.24534225]
   [-1.19475937 -1.1273154  -1.21162033]]

  [[ 0.8116985   1.21636224  1.48613811]
   [ 0.82855946  1.2838062   1.51986003]
   [ 0.84542042  1.31752813  1.55358207]
   ..., 
   [-1.00928843 -1.31278622 -1.5319792 ]
   [-1.16103733 -1.29592526 -1.4813962 ]
   [-1.34650826 -1.22848129 -1.29592526]]

  [[ 0.82855946  1.2332232   1.46927702]
   [ 0.84542042  1.2838062   1.51986003]
   [ 0.86228144  1.33438921  1.55358207]
   ..., 
   [ 0.44075668  0.52506167  0.54192263]
   [ 0.17098086  0.30586877  0.32272977]
   [-0.16623895  0.10353689  0.15411986]]

  ..., 
  [[ 0.33959076  0.32272977 -0.75637358]
   [ 0.37331274  0.39017373 -0.79009557]
   [ 0.37331274  0.42389569 -0.77323461]
   ..., 
   [ 0.05295392  0.42389569 -1.04301047]
   [-0.25054389  0.18784183 -1.2790643 ]
   [-0.21682192  0.27214679 -1.19475937]]

  [[ 0.67681056  0.67681056 -0.53718072]
   [ 0.49133965  0.54192263 -0.73951262]
   [ 0.2552858   0.35645175 -0.9249835 ]
   ..., 
   [ 0.49133965  0.76111549 -0.60462469]
   [ 0.10353689  0.45761767 -0.9249835 ]
   [-0.14937796  0.28900778 -1.11045444]]

  [[ 0.45761767  0.52506167 -0.73951262]
   [ 0.39017373  0.52506167 -0.80695659]
   [ 0.30586877  0.50820065 -0.84067857]
   ..., 
   [ 0.8791424   0.96344739 -0.23368292]
   [ 0.84542042  0.99716938 -0.23368292]
   [ 0.62622756  0.8116985  -0.46973678]]]


 [[[ 0.29025957  0.10075951  0.10075951]
   [ 0.32815957  0.17655954  0.06285951]
   [ 0.06285951 -0.05084052 -0.42984062]
   ..., 
   [-0.27824059 -0.42984062 -1.11204076]
   [-0.35404059 -0.20244056 -1.22574079]
   [-0.16454056  0.0249595  -1.11204076]]

  [[ 0.4039596   0.21445954  0.0249595 ]
   [ 0.4039596   0.21445954  0.17655954]
   [ 0.21445954  0.0249595  -0.12664054]
   ..., 
   [ 0.0249595  -0.05084052 -0.80884069]
   [-0.12664054  0.10075951 -0.92254072]
   [-0.08874054  0.17655954 -0.96044075]]

  [[ 0.7829597   0.66925967  0.10075951]
   [ 0.8587597   0.66925967  0.4418596 ]
   [ 0.4039596   0.21445954  0.0249595 ]
   ..., 
   [-0.08874054 -0.01294051 -0.88464075]
   [-0.16454056  0.10075951 -1.03624082]
   [-0.35404059 -0.08874054 -1.26364088]]

  ..., 
  [[ 1.99575996  1.16195977  0.32815957]
   [ 1.31355977  0.4418596  -0.42984062]
   [ 0.7829597  -0.16454056 -0.96044075]
   ..., 
   [ 2.48846006  1.65465987  0.70715964]
   [ 2.45056009  1.65465987  0.70715964]
   [ 1.84415996  1.16195977  0.17655954]]

  [[ 2.03365993  1.08615971  0.17655954]
   [ 1.46515989  0.55555964 -0.31614059]
   [ 0.82085967  0.0249595  -0.65724069]
   ..., 
   [ 2.52636003  1.7683599   0.89665973]
   [ 2.45056009  1.69255996  0.82085967]
   [ 2.48846006  1.73045993  0.97245973]]

  [[ 1.88205993  1.01035976  0.25235954]
   [ 1.57885993  0.70715964 -0.08874054]
   [ 0.9345597   0.21445954 -0.46774063]
   ..., 
   [ 2.03365993  1.46515989  0.74505967]
   [ 2.26106     1.65465987  0.89665973]
   [ 2.41266012  1.69255996  1.01035976]]]


 [[[ 1.64395726  1.49504554  1.37591624]
   [ 1.37591624  1.25678694  1.25678694]
   [ 0.60157567  0.54201102  0.69092268]
   ..., 
   [ 0.78026968  0.54201102  0.69092268]
   [ 0.69092268  0.42288172  0.57179338]
   [ 0.89939898  0.63135803  0.78026968]]

  [[ 1.40569854  1.22700465  1.10787523]
   [ 1.49504554  1.34613395  1.34613395]
   [ 1.52482784  1.43548095  1.58439255]
   ..., 
   [ 0.63135803  0.36331704  0.48244637]
   [ 0.57179338  0.33353472  0.48244637]
   [ 0.72070503  0.51222873  0.66114032]]

  [[ 0.45266405  0.24418774  0.09527609]
   [ 0.66114032  0.48244637  0.45266405]
   [ 1.55461025  1.40569854  1.55461025]
   ..., 
   [ 0.95896363  0.69092268  0.75048733]
   [ 0.75048733  0.54201102  0.66114032]
   [ 0.48244637  0.36331704  0.51222873]]

  ..., 
  [[-1.18536401 -1.24492872 -1.57253432]
   [-1.03645241 -1.21514642 -1.51296961]
   [-0.4110235  -0.7386291  -1.00667012]
   ..., 
   [-0.38124117 -0.79819375 -1.00667012]
   [-0.05363555 -0.29189417 -0.38124117]
   [-1.15558171 -1.27471101 -1.36405802]]

  [[-1.03645241 -1.21514642 -1.60231662]
   [-1.03645241 -1.24492872 -1.60231662]
   [-0.7386291  -1.03645241 -1.33427572]
   ..., 
   [-0.11320021 -0.47058815 -0.55993515]
   [-0.38124117 -0.5301528  -0.5301528 ]
   [-0.91732311 -1.06623471 -1.03645241]]

  [[-0.5003705  -0.70884681 -0.97688776]
   [-0.8577584  -1.09601712 -1.33427572]
   [-0.67906445 -0.94710541 -1.24492872]
   ..., 
   [ 0.06549377 -0.23232952 -0.29189417]
   [ 0.21440542  0.06549377  0.12505843]
   [ 0.15484075  0.00592911  0.06549377]]]]
[1 3 5 4 1 3 8 0 1 9 3 4 7 9 2 9 4 9 9 0 1 5 9 2 0 8 2 3 0 5 1 6 2 7 8 0 8
 7 6 9 4 5 2 6 9 1 5 2 0 9 4 1 1 1 3 1 4 9 7 3 0 6 7 9 1 8 7 5 9 6 8 6 1 1
 7 0 3 3 5 2 7 4 3 0 3 0 4 9 6 0 4 6 4 5 6 0 0 8 6 6 1 7 4 5 9 2 9 2 8 1 7
 3 9 0 4 3 3 5 4 5 1 0 4 7 7 6 7 1 9 8 6 3 2 4 7 0 0 7 4 4 1 8 2 0 2 0 3 1
 9 2 0 8 7 6 7 9 8 4 0 2 0 7 9 2 5 0 2 4 1 5 5 8 9 4 1 4 2 7 0 3 6 0 8 7 9
 8 9 3 3 4 4 6 4 1 6 0 9 8 0 0 5 8 0 4 8 6 8 1 5 2 0 7 7 1 9 5 0 6 2 3 8 7
 4 3 9 1 0 2 8 4 6 8 1 5 1 1 0 3 9 4 7 8 4 7 2 8 8 5 1 8 0 8 9 2 4 7]

2)定义网络

In [60]:
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0      # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1       # Initial learning rate.

# If a model is trained with multiple GPUs, prefix all Op names with tower_name
# to differentiate the operations. Note that this prefix is removed from the
# names of the summaries when visualizing a model.
TOWER_NAME = 'tower'
DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
DATA_DIR='/mnt/data/software/models/tutorials/image/cifar10'
BATCH_SIZE=512

2.1)定义一些辅助函数

  • _activation_summary 函数创建activations的可视化信息
  • _variable_on_cpu 在CPU上通过所给的名字创建或是返回一个变量
  • _variable_with_weight_decay在CPU上通过所给的名字创建或是返回一个变量,同时支持加入L2损失
In [61]:
def _activation_summary(x):
  """Helper to create summaries for activations.

  Creates a summary that provides a histogram of activations.
  Creates a summary that measures the sparsity of activations.

  Args:
    x: Tensor
  Returns:
    nothing
  """
  # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
  # session. This helps the clarity of presentation on tensorboard.
  # tf.Variable.op is the Operation of this variable.
  tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
  tf.summary.histogram(tensor_name + '/activations', x)
  #tf.nn.zero_fraction returns the fraction of zeros in value.
  tf.summary.scalar(tensor_name + '/sparsity',tf.nn.zero_fraction(x))
    
    
def _variable_on_cpu(name, shape, initializer):
  """Helper to create a Variable stored on CPU memory.

  Args:
    name: name of the variable
    shape: list of ints
    initializer: initializer for Variable

  Returns:
    Variable Tensor
  """
  with tf.device('/cpu:0'):
    var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
  return var


def _variable_with_weight_decay(name, shape, stddev, wd):
  """Helper to create an initialized Variable with weight decay.

  Note that the Variable is initialized with a truncated normal distribution.
  A weight decay is added only if one is specified.

  Args:
    name: name of the variable
    shape: list of ints
    stddev: standard deviation of a truncated Gaussian
    wd: add L2Loss weight decay multiplied by this float. If None, weight decay is not added for this Variable.

  Returns:
    Variable Tensor
  """
  var = _variable_on_cpu(name,shape,tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32))
  #wd表示复杂损失在总损失中的比例
  if wd is not None:
    #tf.nn.l2_loss Computes half the L2 norm of a tensor without the sqrt
    #tf.multiply这个函数实现的是元素级别的相乘,也就是两个相乘的数元素各自相乘,而不是矩阵乘法
    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    #把变量放入一个集合,把很多变量变成一个列表
    #weight_decay损失先用集合管理,后面会加上函数损失
    tf.add_to_collection('losses', weight_decay)
  return var

2.2)distorted_input是调用distorted_inputs_raw的简化版输入

In [62]:
def distorted_inputs():
  """Construct distorted input for CIFAR training using the Reader ops.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not DATA_DIR:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(DATA_DIR, 'cifar-10-batches-bin')
  images, labels = distorted_inputs_raw(data_dir=data_dir,batch_size=BATCH_SIZE)
  return images, labels

2.3)inputs是调用inputs_raw的简化版输入

In [63]:
#调用inputs(eval_data, data_dir, batch_size)
def inputs(eval_data):
  """Construct input for CIFAR evaluation using the Reader ops.

  Args:
    eval_data: bool, indicating if one should use the train or eval data set.

  Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.

  Raises:
    ValueError: If no data_dir
  """
  if not DATA_DIR:
    raise ValueError('Please supply a data_dir')
  data_dir = os.path.join(DATA_DIR, 'cifar-10-batches-bin')
  images, labels = inputs_raw(eval_data=eval_data,data_dir=data_dir,batch_size=BATCH_SIZE)
  return images, labels

2.4)inference在CPU上构建神经网络模型

In [64]:
def inference(images):
  """Build the CIFAR-10 model.

  Args:
    images: Images returned from distorted_inputs() or inputs().

  Returns:
    Logits.
  """
  # We instantiate all variables using tf.get_variable() instead of
  # tf.Variable() in order to share variables across multiple GPU training runs.
  # If we only ran this model on a single GPU, we could simplify this function
  # by replacing all instances of tf.get_variable() with tf.Variable().
  #
  # conv1
  with tf.variable_scope('conv1') as scope:
    #wd=0.0没有对权重进行L2正则化
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 3, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv1 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv1)

  # pool1
  pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                         padding='SAME', name='pool1')
  # norm1
  norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm1')

  # conv2
  with tf.variable_scope('conv2') as scope:
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 64],
                                         stddev=5e-2,
                                         wd=0.0)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
    pre_activation = tf.nn.bias_add(conv, biases)
    conv2 = tf.nn.relu(pre_activation, name=scope.name)
    _activation_summary(conv2)

  # norm2
  norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm2')
  # pool2
  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1], padding='SAME', name='pool2')

  # local3
  with tf.variable_scope('local3') as scope:
    # Move everything into depth so we can perform a single matrix multiply.
    reshape = tf.reshape(pool2, [BATCH_SIZE, -1])
    dim = reshape.get_shape()[1].value
    #wd=0.004对权重进行L2正则化
    weights = _variable_with_weight_decay('weights', shape=[dim, 384],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
    _activation_summary(local3)

  # local4
  with tf.variable_scope('local4') as scope:
    #wd=0.004对权重进行L2正则化    
    weights = _variable_with_weight_decay('weights', shape=[384, 192],
                                          stddev=0.04, wd=0.004)
    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
    _activation_summary(local4)

  # linear layer(WX + b),
  # We don't apply softmax here because
  # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
  # and performs the softmax internally for efficiency.
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
                                          stddev=1/192.0, wd=0.0)
    biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
    softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
    _activation_summary(softmax_linear)

  return softmax_linear

2.5)计算前馈传播总损失

In [65]:
def loss(logits, labels):
  """Add L2Loss to all the trainable variables.

  Add summary for "Loss" and "Loss/avg".
  Args:
    logits: Logits from inference().
    labels: Labels from distorted_inputs or inputs(). 1-D tensor of shape [batch_size]

  Returns:
    Loss tensor of type float.
  """
  # Calculate the average cross entropy loss across the batch.
  labels = tf.cast(labels, tf.int64)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels
                                                                 , logits=logits, name='cross_entropy_per_example')
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
  tf.add_to_collection('losses', cross_entropy_mean)

  # The total loss is defined as the cross entropy loss plus all of the weight
  # decay terms (L2 loss).
  return tf.add_n(tf.get_collection('losses'), name='total_loss')

2.6)summary损失信息

In [66]:
def _add_loss_summaries(total_loss):
  """Add summaries for losses in CIFAR-10 model.

  Generates moving average for all losses and associated summaries for
  visualizing the performance of the network.

  Args:
    total_loss: Total loss from loss().
  Returns:
    loss_averages_op: op for generating moving averages of losses.
  """
  # Compute the moving average of all individual losses and the total loss.
  loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
  losses = tf.get_collection('losses')
  #apply()方法添加了训练变量的影子副本,并保持了其影子副本中训练变量的移动平均值操作。在每次训练之后调用此操作,更新移动平均值
  #average()和average_name()方法可以获取影子变量及其名称
  loss_averages_op = loss_averages.apply(losses + [total_loss])

  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Name each loss as '(raw)' and name the moving average version of the loss
    # as the original loss name.
    tf.summary.scalar(l.op.name + ' (raw)', l)
    tf.summary.scalar(l.op.name, loss_averages.average(l))

  return loss_averages_op

2.7)定义优化函数及应用到所有的可训练变量上

In [67]:
def train_raw(total_loss, global_step):
  """Train CIFAR-10 model.

  Create an optimizer and apply to all trainable variables. Add moving
  average for all trainable variables.

  Args:
    total_loss: Total loss from loss().
    global_step: Integer Variable counting the number of training steps processed.()
    global_step: 记录训练steps的整数变量
  Returns:
    train_op: op for training.
  """
  # Variables that affect learning rate.
  num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / BATCH_SIZE
  #NUM_EPOCHS_PER_DECAY为学习率的衰减的epochs
  decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

  # Decay the learning rate exponentially based on the number of steps.
  #INITIAL_LEARNING_RATE为事先设定好的学习率
  #LEARNING_RATE_DECAY_FACTOR为衰减系数
  #decay_steps为衰减速度
  #staircase(默认值为False,当为True时,(global_step/decay_steps)则被转化为整数) ,选择不同的衰减方式。
  lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                  global_step,
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
  tf.summary.scalar('learning_rate', lr)

  # Generate moving averages of all losses and associated summaries.
  loss_averages_op = _add_loss_summaries(total_loss)

  # Compute gradients.
  #with g.control_dependencies([a, b, c]):
  #d = ...
  #e = ...
  #`d` and `e` will only run after `a`, `b`, and `c` have executed.
  #也就是a、b、c必须运行后,d和e才能运行
  with tf.control_dependencies([loss_averages_op]):
    opt = tf.train.GradientDescentOptimizer(lr)
    #Compute gradients of "loss" for the variables in "var_list"
    #Defaults to the list of variables collected in the graph under the key GraphKey.TRAINABLE_VARIABLES
    #return A list of (gradient, variable) pairs
    grads = opt.compute_gradients(total_loss)

  # Apply gradients to variables
  # This is the second part of minimize(). It returns an Operation that applies gradients
  apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

  # Add histograms for trainable variables.
  for var in tf.trainable_variables():
    tf.summary.histogram(var.op.name, var)

  # Add histograms for gradients.
  for grad, var in grads:
    if grad is not None:
      tf.summary.histogram(var.op.name + '/gradients', grad)

  # Track the moving averages of all trainable variables.
  variable_averages = tf.train.ExponentialMovingAverage(
      MOVING_AVERAGE_DECAY, global_step)
  #Returns all variables created with trainable=True
  variables_averages_op = variable_averages.apply(tf.trainable_variables())

  with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
    #Does nothing. Only useful as a placeholder for control edges
    train_op = tf.no_op(name='train')

  return train_op
In [68]:
def maybe_download_and_extract():
  """Download and extract the tarball from Alex's website."""
  dest_directory = DATA_DIR
  if not os.path.exists(dest_directory):
    os.makedirs(dest_directory)
  filename = DATA_URL.split('/')[-1]
  filepath = os.path.join(dest_directory, filename)
  if not os.path.exists(filepath):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
          float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()
    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
  if not os.path.exists(extracted_dir_path):
    tarfile.open(filepath, 'r:gz').extractall(dest_directory)

3)在CPU或者GPU上训练模型

In [69]:
train_dir='/mnt/data/software/models/tutorials/image/cifar10/train'
max_steps=10000
log_device_placement=True
log_frequency=500
num_gpus=2
In [70]:
def train_gpu():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default():
    #返回或者创建 global step 张量
    global_step = tf.contrib.framework.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
      images, labels = distorted_inputs()

    # Build a Graph that computes the logits predictions from the inference model.
    logits = inference(images)

    # Calculate loss.
    los = loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and updates the model parameters.
    train_op = train_raw(los, global_step)

    #tf.train.SessionRunHook hook to extend calls to MonitoredSession.run().
    class _LoggerHook(tf.train.SessionRunHook):
      """Logs loss and runtime."""
    
      #Called once before using the session.
      def begin(self):
        self._step = -1
        self._start_time = time.time()
      
      #Called before each call to run().
      #The run_context argument is a SessionRunContext that provides information about the upcoming run() call
      def before_run(self, run_context):
        self._step += 1
        return tf.train.SessionRunArgs(los)  # Asks for loss value.
      #Called after each call to run()
      #The run_values argument contains results of requested ops/tensors by before_run()
      def after_run(self, run_context, run_values):
        if self._step % log_frequency == 0:
          current_time = time.time()
          duration = current_time - self._start_time
          self._start_time = current_time

          loss_value = run_values.results
          examples_per_sec = log_frequency * BATCH_SIZE / duration
          sec_per_batch = float(duration / log_frequency)

          format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
          print (format_str % (datetime.now(), self._step, loss_value,
                               examples_per_sec, sec_per_batch))

    #Creates a MonitoredSession for training
    #checkpoint_dir: A string. Optional path to a directory where to restore variables.
    #hooks: Optional list of SessionRunHook objects
    #config: an instance of tf.ConfigProto proto used to configure the session. 
    #        It's the config argument of constructor of tf.Session
    with tf.train.MonitoredTrainingSession(
        checkpoint_dir=train_dir,
        #tf.train.StopAtStepHook hook that requests stop at a specified step
        #tf.train.NanTensorHook monitors the loss tensor and stops training if loss is NaN
        hooks=[tf.train.StopAtStepHook(last_step=max_steps),
               tf.train.NanTensorHook(los),
               _LoggerHook()],
        config=tf.ConfigProto(log_device_placement=log_device_placement)) as mon_sess:
        while not mon_sess.should_stop():
            mon_sess.run(train_op)
In [71]:
maybe_download_and_extract()
if tf.gfile.Exists(train_dir):
    tf.gfile.DeleteRecursively(train_dir)
tf.gfile.MakeDirs(train_dir)
train_gpu()
Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.
INFO:tensorflow:Summary name conv1/weight_loss (raw) is illegal; using conv1/weight_loss__raw_ instead.
INFO:tensorflow:Summary name conv2/weight_loss (raw) is illegal; using conv2/weight_loss__raw_ instead.
INFO:tensorflow:Summary name local3/weight_loss (raw) is illegal; using local3/weight_loss__raw_ instead.
INFO:tensorflow:Summary name local4/weight_loss (raw) is illegal; using local4/weight_loss__raw_ instead.
INFO:tensorflow:Summary name softmax_linear/weight_loss (raw) is illegal; using softmax_linear/weight_loss__raw_ instead.
INFO:tensorflow:Summary name cross_entropy (raw) is illegal; using cross_entropy__raw_ instead.
INFO:tensorflow:Summary name total_loss (raw) is illegal; using total_loss__raw_ instead.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /mnt/data/software/models/tutorials/image/cifar10/train/model.ckpt.
2017-10-21 11:11:00.313941: step 0, loss = 4.68 (48897.8 examples/sec; 0.010 sec/batch)
INFO:tensorflow:global_step/sec: 10.6067
INFO:tensorflow:global_step/sec: 10.1723
INFO:tensorflow:global_step/sec: 10.7315
INFO:tensorflow:global_step/sec: 10.7466
INFO:tensorflow:global_step/sec: 10.9881
2017-10-21 11:11:47.164544: step 500, loss = 3.15 (5464.2 examples/sec; 0.094 sec/batch)
INFO:tensorflow:global_step/sec: 11.0169
INFO:tensorflow:global_step/sec: 10.8688
INFO:tensorflow:global_step/sec: 10.2655
INFO:tensorflow:global_step/sec: 10.1304
INFO:tensorflow:global_step/sec: 10.8203
2017-10-21 11:12:34.297021: step 1000, loss = 2.23 (5431.5 examples/sec; 0.094 sec/batch)
INFO:tensorflow:global_step/sec: 10.9515
INFO:tensorflow:global_step/sec: 11.0193
INFO:tensorflow:global_step/sec: 10.3249
INFO:tensorflow:global_step/sec: 10.3802
INFO:tensorflow:global_step/sec: 10.6816
2017-10-21 11:13:21.186436: step 1500, loss = 1.81 (5459.7 examples/sec; 0.094 sec/batch)
INFO:tensorflow:global_step/sec: 10.9245
INFO:tensorflow:global_step/sec: 11.2029
INFO:tensorflow:global_step/sec: 10.5203
INFO:tensorflow:global_step/sec: 10.9134
INFO:tensorflow:global_step/sec: 10.9655
2017-10-21 11:14:07.051313: step 2000, loss = 1.51 (5581.6 examples/sec; 0.092 sec/batch)
INFO:tensorflow:global_step/sec: 11.2244
INFO:tensorflow:global_step/sec: 10.9971
INFO:tensorflow:global_step/sec: 11.209
INFO:tensorflow:global_step/sec: 10.4449
INFO:tensorflow:global_step/sec: 10.5995
2017-10-21 11:14:52.986951: step 2500, loss = 1.22 (5573.0 examples/sec; 0.092 sec/batch)
INFO:tensorflow:global_step/sec: 11.1119
INFO:tensorflow:global_step/sec: 11.1318
INFO:tensorflow:global_step/sec: 11.1929
INFO:tensorflow:global_step/sec: 11.1335
INFO:tensorflow:global_step/sec: 10.976
2017-10-21 11:15:37.993686: step 3000, loss = 1.07 (5688.0 examples/sec; 0.090 sec/batch)
INFO:tensorflow:global_step/sec: 9.9205
INFO:tensorflow:global_step/sec: 10.3692
INFO:tensorflow:global_step/sec: 10.8072
INFO:tensorflow:global_step/sec: 11.033
INFO:tensorflow:global_step/sec: 11.1402
2017-10-21 11:16:25.013712: step 3500, loss = 1.08 (5444.5 examples/sec; 0.094 sec/batch)
INFO:tensorflow:global_step/sec: 10.2826
INFO:tensorflow:global_step/sec: 10.6134
INFO:tensorflow:global_step/sec: 11.1899
INFO:tensorflow:global_step/sec: 10.7897
INFO:tensorflow:global_step/sec: 11.2282
2017-10-21 11:17:11.269204: step 4000, loss = 0.84 (5534.5 examples/sec; 0.093 sec/batch)
INFO:tensorflow:global_step/sec: 10.2857
INFO:tensorflow:global_step/sec: 10.7948
INFO:tensorflow:global_step/sec: 11.2661
INFO:tensorflow:global_step/sec: 10.7136
INFO:tensorflow:global_step/sec: 10.5
2017-10-21 11:17:57.989157: step 4500, loss = 0.94 (5479.5 examples/sec; 0.093 sec/batch)
INFO:tensorflow:global_step/sec: 10.8856
INFO:tensorflow:global_step/sec: 11.0833
INFO:tensorflow:global_step/sec: 11.2332
INFO:tensorflow:global_step/sec: 10.8887
INFO:tensorflow:global_step/sec: 10.6498
2017-10-21 11:18:43.673765: step 5000, loss = 0.82 (5603.6 examples/sec; 0.091 sec/batch)
INFO:tensorflow:global_step/sec: 11.1172
INFO:tensorflow:global_step/sec: 11.1057
INFO:tensorflow:global_step/sec: 11.124
INFO:tensorflow:global_step/sec: 11.0881
INFO:tensorflow:global_step/sec: 10.9234
2017-10-21 11:19:28.837949: step 5500, loss = 0.80 (5668.2 examples/sec; 0.090 sec/batch)
INFO:tensorflow:global_step/sec: 11.1556
INFO:tensorflow:global_step/sec: 10.6786
INFO:tensorflow:global_step/sec: 11.0468
INFO:tensorflow:global_step/sec: 10.8096
INFO:tensorflow:global_step/sec: 11.1995
2017-10-21 11:20:14.396978: step 6000, loss = 0.88 (5619.1 examples/sec; 0.091 sec/batch)
INFO:tensorflow:global_step/sec: 11.1761
INFO:tensorflow:global_step/sec: 11.2345
INFO:tensorflow:global_step/sec: 11.2473
INFO:tensorflow:global_step/sec: 10.8391
INFO:tensorflow:global_step/sec: 11.2359
2017-10-21 11:20:59.263157: step 6500, loss = 0.77 (5705.9 examples/sec; 0.090 sec/batch)
INFO:tensorflow:Saving checkpoints for 6513 into /mnt/data/software/models/tutorials/image/cifar10/train/model.ckpt.
INFO:tensorflow:global_step/sec: 10.7903
INFO:tensorflow:global_step/sec: 11.2133
INFO:tensorflow:global_step/sec: 10.7675
INFO:tensorflow:global_step/sec: 10.9497
INFO:tensorflow:global_step/sec: 10.7757
2017-10-21 11:21:45.151635: step 7000, loss = 0.80 (5578.7 examples/sec; 0.092 sec/batch)
INFO:tensorflow:global_step/sec: 10.5951
INFO:tensorflow:global_step/sec: 10.8205
INFO:tensorflow:global_step/sec: 10.9916
INFO:tensorflow:global_step/sec: 11.2841
INFO:tensorflow:global_step/sec: 11.1756
2017-10-21 11:22:30.740643: step 7500, loss = 0.92 (5615.4 examples/sec; 0.091 sec/batch)
INFO:tensorflow:global_step/sec: 10.4674
INFO:tensorflow:global_step/sec: 11.0555
INFO:tensorflow:global_step/sec: 11.052
INFO:tensorflow:global_step/sec: 11.2152
INFO:tensorflow:global_step/sec: 10.5285
2017-10-21 11:23:16.800253: step 8000, loss = 0.71 (5558.0 examples/sec; 0.092 sec/batch)
INFO:tensorflow:global_step/sec: 11.2545
INFO:tensorflow:global_step/sec: 10.8677
INFO:tensorflow:global_step/sec: 10.5373
INFO:tensorflow:global_step/sec: 11.1849
INFO:tensorflow:global_step/sec: 11.1015
2017-10-21 11:24:02.323744: step 8500, loss = 0.73 (5623.5 examples/sec; 0.091 sec/batch)
INFO:tensorflow:global_step/sec: 11.3278
INFO:tensorflow:global_step/sec: 10.6042
INFO:tensorflow:global_step/sec: 10.8687
INFO:tensorflow:global_step/sec: 11.0932
INFO:tensorflow:global_step/sec: 11.2781
2017-10-21 11:24:47.666013: step 9000, loss = 0.72 (5646.0 examples/sec; 0.091 sec/batch)
INFO:tensorflow:global_step/sec: 11.274
INFO:tensorflow:global_step/sec: 11.2455
INFO:tensorflow:global_step/sec: 10.589
INFO:tensorflow:global_step/sec: 11.1311
INFO:tensorflow:global_step/sec: 11.352
2017-10-21 11:25:32.664621: step 9500, loss = 0.70 (5689.1 examples/sec; 0.090 sec/batch)
INFO:tensorflow:global_step/sec: 11.1086
INFO:tensorflow:global_step/sec: 11.3257
INFO:tensorflow:global_step/sec: 10.7697
INFO:tensorflow:global_step/sec: 11.2971
INFO:tensorflow:Saving checkpoints for 10000 into /mnt/data/software/models/tutorials/image/cifar10/train/model.ckpt.

4)在多GPU上训练

4.1)各个GPU分别计算损失

In [74]:
def tower_loss(scope, images, labels):
  """Calculate the total loss on a single tower running the CIFAR model.

  Args:
    scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
    images: Images. 4D tensor of shape [batch_size, height, width, 3].
    labels: Labels. 1D tensor of shape [batch_size].

  Returns:
     Tensor of shape [] containing the total loss for a batch of data
  """

  # Build inference Graph.
  logits = inference(images)

  # Build the portion of the Graph calculating the losses. Note that we will
  # assemble the total_loss using a custom function below.
  _ = loss(logits, labels)

  # Assemble all of the losses for the current tower only.
  losses = tf.get_collection('losses', scope)

  # Calculate the total loss for the current tower.
  total_loss = tf.add_n(losses, name='total_loss')

  # Attach a scalar summary to all individual losses and the total loss; do the
  # same for the averaged version of the losses.
  for l in losses + [total_loss]:
    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
    # session. This helps the clarity of presentation on tensorboard.
    loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
    tf.summary.scalar(loss_name, l)

  return total_loss

4.2)计算跨多个GPU共享变量的平均梯度

In [73]:
def average_gradients(tower_grads):
  """Calculate the average gradient for each shared variable across all towers.

  Note that this function provides a synchronization point across all towers.

  Args:
    tower_grads: List of lists of (gradient, variable) tuples. The outer list
      is over individual gradients. The inner list is over the gradient
      calculation for each tower.
  Returns:
     List of pairs of (gradient, variable) where the gradient has been averaged
     across all towers.
  """
  average_grads = []
  for grad_and_vars in zip(*tower_grads):
    # Note that each grad_and_vars looks like the following:
    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
    grads = []
    for g, _ in grad_and_vars:
      # Add 0 dimension to the gradients to represent the tower.
      # tf.expand_dims given a tensor input, this operation inserts a dimension of 1 at the dimension index axis of input's shape. 
      #The dimension index axis starts at zero;
      expanded_g = tf.expand_dims(g, 0)

      # Append on a 'tower' dimension which we will average over below.
      grads.append(expanded_g)

    # Average over the 'tower' dimension.
    grad = tf.concat(axis=0, values=grads)
    grad = tf.reduce_mean(grad, 0)

    # Keep in mind that the Variables are redundant because they are shared
    # across towers. So .. we will just return the first tower's pointer to
    # the Variable.
    v = grad_and_vars[0][1]
    grad_and_var = (grad, v)
    average_grads.append(grad_and_var)
  return average_grads

4.3)定义多GPU训练的函数

In [77]:
def train_mgpu():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    # Create a variable to count the number of train() calls. This equals the
    # number of batches processed * FLAGS.num_gpus.
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)

    # Calculate the learning rate schedule.
    num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                             BATCH_SIZE)
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps.
    #在train_gpu已经说明
    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)

    # Create an optimizer that performs gradient descent.
    opt = tf.train.GradientDescentOptimizer(lr)

    # Get images and labels for CIFAR-10.
    images, labels = distorted_inputs()
    
    #tf.contrib.slim.prefetch_queue.prefetch_queue Example:
    #This is for example useful to pre-assemble input batches read with
    #`tf.train.batch()` and enqueue the pre-assembled batches.  Ops that dequeue
    #from the pre-assembled queue will not pay the cost of assembling the batch.
    #images, labels = tf.train.batch([image, label], batch_size=32, num_threads=4)
    #batch_queue = prefetch_queue([images, labels])
    #images, labels = batch_queue.dequeue()
    #logits = Net(images)
    #loss = Loss(logits, labels)
    #Returns:
    #A queue from which you can dequeue tensors with the same type and shape
    #as `tensors`.

    batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * num_gpus)
    # Calculate the gradients for each model tower.
    tower_grads = []
    with tf.variable_scope(tf.get_variable_scope()):
      for i in xrange(num_gpus):
        with tf.device('/gpu:%d' % i):
          with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
            # Dequeues one batch for the GPU
            image_batch, label_batch = batch_queue.dequeue()
            # Calculate the loss for one tower of the CIFAR model. This function
            # constructs the entire CIFAR model but shares the variables across
            # all towers.
            loss = tower_loss(scope, image_batch, label_batch)

            # Reuse variables for the next tower.
            tf.get_variable_scope().reuse_variables()

            # Retain the summaries from the final tower.
            summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

            # Calculate the gradients for the batch of data on this CIFAR tower.
            grads = opt.compute_gradients(loss)

            # Keep track of the gradients across all towers.
            tower_grads.append(grads)

    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = average_gradients(tower_grads)

    # Add a summary to track the learning rate.
    summaries.append(tf.summary.scalar('learning_rate', lr))

    # Add histograms for gradients.
    for grad, var in grads:
      if grad is not None:
        summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))

    # Apply the gradients to adjust the shared variables.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      summaries.append(tf.summary.histogram(var.op.name, var))

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, variables_averages_op)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

    # Build the summary operation from the last tower summaries.
    summary_op = tf.summary.merge(summaries)

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    sess = tf.Session(config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)

    for step in xrange(max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = BATCH_SIZE * num_gpus
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration / num_gpus

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == max_steps:
        checkpoint_path = os.path.join(train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
In [78]:
maybe_download_and_extract()
if tf.gfile.Exists(train_dir):
    tf.gfile.DeleteRecursively(train_dir)
tf.gfile.MakeDirs(train_dir)
train_mgpu()
Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.
2017-10-21 12:30:41.051537: step 0, loss = 4.68 (277.8 examples/sec; 1.843 sec/batch)
2017-10-21 12:30:42.736577: step 10, loss = 4.61 (17847.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:30:43.386587: step 20, loss = 4.48 (11746.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:30:44.277388: step 30, loss = 4.33 (11253.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:30:45.165070: step 40, loss = 4.34 (11166.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:30:46.068972: step 50, loss = 4.33 (11763.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:30:46.953018: step 60, loss = 4.15 (11901.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:30:47.854777: step 70, loss = 4.26 (11150.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:30:48.740744: step 80, loss = 4.17 (11258.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:30:49.625224: step 90, loss = 4.13 (11899.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:30:50.510402: step 100, loss = 4.05 (12672.4 examples/sec; 0.040 sec/batch)
2017-10-21 12:30:51.856481: step 110, loss = 3.95 (16174.5 examples/sec; 0.032 sec/batch)
2017-10-21 12:30:52.460184: step 120, loss = 3.99 (15366.8 examples/sec; 0.033 sec/batch)
2017-10-21 12:30:53.354550: step 130, loss = 3.96 (10599.3 examples/sec; 0.048 sec/batch)
2017-10-21 12:30:54.220917: step 140, loss = 3.96 (11590.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:30:55.099786: step 150, loss = 4.11 (11369.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:30:55.973442: step 160, loss = 3.85 (11524.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:30:56.829284: step 170, loss = 3.81 (12463.8 examples/sec; 0.041 sec/batch)
2017-10-21 12:30:57.690481: step 180, loss = 3.78 (12182.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:30:58.571223: step 190, loss = 3.76 (11939.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:30:59.436424: step 200, loss = 3.68 (11950.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:00.775563: step 210, loss = 4.02 (18140.8 examples/sec; 0.028 sec/batch)
2017-10-21 12:31:01.381243: step 220, loss = 3.71 (17243.5 examples/sec; 0.030 sec/batch)
2017-10-21 12:31:02.299562: step 230, loss = 3.78 (10510.6 examples/sec; 0.049 sec/batch)
2017-10-21 12:31:03.165493: step 240, loss = 3.71 (11497.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:04.031698: step 250, loss = 3.59 (11979.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:04.916319: step 260, loss = 3.77 (11444.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:05.780762: step 270, loss = 3.75 (11861.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:06.675217: step 280, loss = 3.53 (10389.0 examples/sec; 0.049 sec/batch)
2017-10-21 12:31:07.532295: step 290, loss = 3.60 (13552.7 examples/sec; 0.038 sec/batch)
2017-10-21 12:31:08.384027: step 300, loss = 3.60 (12736.2 examples/sec; 0.040 sec/batch)
2017-10-21 12:31:09.722282: step 310, loss = 3.38 (17633.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:31:10.323860: step 320, loss = 3.44 (14633.0 examples/sec; 0.035 sec/batch)
2017-10-21 12:31:11.183405: step 330, loss = 3.47 (11933.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:12.045533: step 340, loss = 3.36 (12386.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:31:12.941498: step 350, loss = 3.32 (11907.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:13.813814: step 360, loss = 3.55 (11216.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:31:14.690096: step 370, loss = 3.42 (12935.4 examples/sec; 0.040 sec/batch)
2017-10-21 12:31:15.580775: step 380, loss = 3.33 (10578.9 examples/sec; 0.048 sec/batch)
2017-10-21 12:31:16.443715: step 390, loss = 3.37 (11890.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:17.320530: step 400, loss = 3.31 (11147.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:31:18.624618: step 410, loss = 3.20 (18593.7 examples/sec; 0.028 sec/batch)
2017-10-21 12:31:19.281180: step 420, loss = 3.24 (16016.3 examples/sec; 0.032 sec/batch)
2017-10-21 12:31:20.179468: step 430, loss = 3.23 (11643.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:31:21.028476: step 440, loss = 3.48 (11502.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:21.903108: step 450, loss = 3.24 (11511.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:31:22.784281: step 460, loss = 3.20 (10399.4 examples/sec; 0.049 sec/batch)
2017-10-21 12:31:23.659360: step 470, loss = 2.99 (12683.7 examples/sec; 0.040 sec/batch)
2017-10-21 12:31:24.537137: step 480, loss = 3.19 (12140.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:25.429671: step 490, loss = 3.04 (12071.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:26.291773: step 500, loss = 3.15 (12110.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:27.586351: step 510, loss = 3.08 (18321.1 examples/sec; 0.028 sec/batch)
2017-10-21 12:31:28.193427: step 520, loss = 3.17 (13535.9 examples/sec; 0.038 sec/batch)
2017-10-21 12:31:29.069402: step 530, loss = 2.99 (11974.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:29.949672: step 540, loss = 2.94 (11349.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:30.829899: step 550, loss = 2.93 (11295.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:31.748512: step 560, loss = 3.04 (11457.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:32.629241: step 570, loss = 3.11 (11472.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:33.500251: step 580, loss = 3.08 (12165.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:34.372527: step 590, loss = 3.02 (11116.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:31:35.248458: step 600, loss = 2.81 (11778.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:36.544254: step 610, loss = 2.96 (17384.9 examples/sec; 0.029 sec/batch)
2017-10-21 12:31:37.179706: step 620, loss = 2.86 (10917.4 examples/sec; 0.047 sec/batch)
2017-10-21 12:31:38.066097: step 630, loss = 2.79 (11940.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:38.937556: step 640, loss = 2.79 (12178.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:39.803705: step 650, loss = 2.86 (12457.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:31:40.698726: step 660, loss = 2.78 (10751.3 examples/sec; 0.048 sec/batch)
2017-10-21 12:31:41.564566: step 670, loss = 2.70 (11087.9 examples/sec; 0.046 sec/batch)
2017-10-21 12:31:42.473679: step 680, loss = 2.73 (11225.9 examples/sec; 0.046 sec/batch)
2017-10-21 12:31:43.336792: step 690, loss = 2.81 (12237.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:44.211129: step 700, loss = 2.78 (11407.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:45.487144: step 710, loss = 2.60 (18198.3 examples/sec; 0.028 sec/batch)
2017-10-21 12:31:46.101378: step 720, loss = 2.70 (16278.1 examples/sec; 0.031 sec/batch)
2017-10-21 12:31:46.977315: step 730, loss = 2.76 (11954.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:47.852571: step 740, loss = 2.68 (11975.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:48.725336: step 750, loss = 2.75 (11363.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:49.590120: step 760, loss = 2.62 (12551.2 examples/sec; 0.041 sec/batch)
2017-10-21 12:31:50.467413: step 770, loss = 2.49 (11989.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:31:51.342112: step 780, loss = 2.84 (12308.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:52.239727: step 790, loss = 2.60 (11351.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:53.145806: step 800, loss = 2.60 (12410.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:31:54.443406: step 810, loss = 2.51 (19105.3 examples/sec; 0.027 sec/batch)
2017-10-21 12:31:55.057838: step 820, loss = 2.56 (12130.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:31:55.916397: step 830, loss = 2.51 (12374.2 examples/sec; 0.041 sec/batch)
2017-10-21 12:31:56.787940: step 840, loss = 2.48 (11382.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:57.657210: step 850, loss = 2.60 (11285.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:31:58.555715: step 860, loss = 2.43 (10989.3 examples/sec; 0.047 sec/batch)
2017-10-21 12:31:59.432113: step 870, loss = 2.56 (12120.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:00.309274: step 880, loss = 2.37 (11290.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:01.203291: step 890, loss = 2.40 (11902.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:02.070369: step 900, loss = 2.45 (11692.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:03.368961: step 910, loss = 2.42 (17195.1 examples/sec; 0.030 sec/batch)
2017-10-21 12:32:03.978102: step 920, loss = 2.40 (18536.8 examples/sec; 0.028 sec/batch)
2017-10-21 12:32:04.850829: step 930, loss = 2.35 (11122.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:05.728512: step 940, loss = 2.34 (12271.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:06.609704: step 950, loss = 2.41 (12974.5 examples/sec; 0.039 sec/batch)
2017-10-21 12:32:07.493094: step 960, loss = 2.56 (11194.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:08.368741: step 970, loss = 2.32 (12206.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:09.225001: step 980, loss = 2.35 (11714.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:10.093379: step 990, loss = 2.35 (13891.7 examples/sec; 0.037 sec/batch)
2017-10-21 12:32:10.964604: step 1000, loss = 2.44 (11136.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:12.354888: step 1010, loss = 2.38 (17813.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:32:12.953107: step 1020, loss = 2.27 (18055.3 examples/sec; 0.028 sec/batch)
2017-10-21 12:32:13.812694: step 1030, loss = 2.36 (12088.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:14.690819: step 1040, loss = 2.40 (11652.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:15.578637: step 1050, loss = 2.25 (10602.3 examples/sec; 0.048 sec/batch)
2017-10-21 12:32:16.447674: step 1060, loss = 2.23 (11491.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:17.334336: step 1070, loss = 2.21 (12663.5 examples/sec; 0.040 sec/batch)
2017-10-21 12:32:18.222834: step 1080, loss = 2.39 (11675.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:19.111262: step 1090, loss = 2.30 (11775.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:19.990807: step 1100, loss = 2.30 (11792.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:21.278193: step 1110, loss = 2.23 (17549.3 examples/sec; 0.029 sec/batch)
2017-10-21 12:32:21.882903: step 1120, loss = 2.26 (14079.4 examples/sec; 0.036 sec/batch)
2017-10-21 12:32:22.767826: step 1130, loss = 2.30 (12114.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:23.637183: step 1140, loss = 2.10 (11542.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:24.515316: step 1150, loss = 2.11 (11318.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:25.398423: step 1160, loss = 2.10 (11478.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:26.286040: step 1170, loss = 2.11 (11385.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:27.177585: step 1180, loss = 2.28 (11527.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:28.050917: step 1190, loss = 2.05 (11983.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:28.935740: step 1200, loss = 2.06 (11311.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:30.192844: step 1210, loss = 2.10 (18194.8 examples/sec; 0.028 sec/batch)
2017-10-21 12:32:30.804495: step 1220, loss = 2.11 (11860.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:31.681886: step 1230, loss = 1.98 (11936.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:32.537324: step 1240, loss = 2.03 (12131.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:33.420860: step 1250, loss = 2.14 (11111.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:34.306038: step 1260, loss = 2.00 (10503.6 examples/sec; 0.049 sec/batch)
2017-10-21 12:32:35.170587: step 1270, loss = 1.93 (11923.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:36.060723: step 1280, loss = 2.11 (11176.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:36.947428: step 1290, loss = 1.99 (10970.8 examples/sec; 0.047 sec/batch)
2017-10-21 12:32:37.811720: step 1300, loss = 2.04 (12017.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:39.075196: step 1310, loss = 1.82 (18196.8 examples/sec; 0.028 sec/batch)
2017-10-21 12:32:39.677776: step 1320, loss = 1.93 (17862.0 examples/sec; 0.029 sec/batch)
2017-10-21 12:32:40.539517: step 1330, loss = 1.97 (11212.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:41.414027: step 1340, loss = 1.94 (12246.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:42.282865: step 1350, loss = 1.96 (11755.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:32:43.155724: step 1360, loss = 1.99 (12086.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:44.035763: step 1370, loss = 1.92 (10957.4 examples/sec; 0.047 sec/batch)
2017-10-21 12:32:44.894919: step 1380, loss = 1.91 (12346.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:32:45.764153: step 1390, loss = 1.88 (12402.8 examples/sec; 0.041 sec/batch)
2017-10-21 12:32:46.644129: step 1400, loss = 1.91 (11362.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:47.911023: step 1410, loss = 1.91 (18478.7 examples/sec; 0.028 sec/batch)
2017-10-21 12:32:48.519430: step 1420, loss = 1.93 (12057.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:49.382650: step 1430, loss = 1.95 (11447.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:50.258771: step 1440, loss = 1.82 (11923.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:51.132126: step 1450, loss = 1.89 (12627.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:32:52.013478: step 1460, loss = 1.96 (12202.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:52.896116: step 1470, loss = 1.88 (10600.4 examples/sec; 0.048 sec/batch)
2017-10-21 12:32:53.769528: step 1480, loss = 1.82 (13223.9 examples/sec; 0.039 sec/batch)
2017-10-21 12:32:54.644306: step 1490, loss = 1.82 (12023.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:32:55.526417: step 1500, loss = 1.76 (12274.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:32:56.784389: step 1510, loss = 1.78 (17902.9 examples/sec; 0.029 sec/batch)
2017-10-21 12:32:57.419729: step 1520, loss = 1.67 (11109.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:32:58.282811: step 1530, loss = 1.91 (11456.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:32:59.161573: step 1540, loss = 1.82 (12093.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:00.039645: step 1550, loss = 1.75 (11814.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:00.922327: step 1560, loss = 1.98 (11578.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:01.788345: step 1570, loss = 1.79 (12587.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:33:02.678426: step 1580, loss = 1.78 (11917.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:03.545326: step 1590, loss = 1.79 (11206.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:33:04.437968: step 1600, loss = 1.79 (10988.1 examples/sec; 0.047 sec/batch)
2017-10-21 12:33:05.699598: step 1610, loss = 1.79 (18367.5 examples/sec; 0.028 sec/batch)
2017-10-21 12:33:06.309600: step 1620, loss = 1.70 (15790.4 examples/sec; 0.032 sec/batch)
2017-10-21 12:33:07.203870: step 1630, loss = 1.75 (11824.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:08.076292: step 1640, loss = 1.68 (13165.2 examples/sec; 0.039 sec/batch)
2017-10-21 12:33:08.959397: step 1650, loss = 1.66 (11731.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:09.832182: step 1660, loss = 1.81 (11548.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:10.701956: step 1670, loss = 1.71 (11768.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:11.568780: step 1680, loss = 1.73 (12038.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:12.445785: step 1690, loss = 1.71 (11112.0 examples/sec; 0.046 sec/batch)
2017-10-21 12:33:13.341331: step 1700, loss = 1.68 (11228.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:33:14.612109: step 1710, loss = 1.72 (17478.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:33:15.213502: step 1720, loss = 1.64 (11532.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:16.091027: step 1730, loss = 1.90 (11787.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:16.970220: step 1740, loss = 1.67 (11000.4 examples/sec; 0.047 sec/batch)
2017-10-21 12:33:17.855496: step 1750, loss = 1.66 (10758.8 examples/sec; 0.048 sec/batch)
2017-10-21 12:33:18.728295: step 1760, loss = 1.62 (10712.9 examples/sec; 0.048 sec/batch)
2017-10-21 12:33:19.602718: step 1770, loss = 1.54 (11217.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:33:20.470026: step 1780, loss = 1.70 (11489.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:33:21.351199: step 1790, loss = 1.67 (10704.0 examples/sec; 0.048 sec/batch)
2017-10-21 12:33:22.244150: step 1800, loss = 1.60 (11450.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:33:23.534287: step 1810, loss = 1.62 (15482.4 examples/sec; 0.033 sec/batch)
2017-10-21 12:33:24.134232: step 1820, loss = 1.59 (17368.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:33:24.978433: step 1830, loss = 1.62 (11768.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:25.855490: step 1840, loss = 1.74 (11866.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:26.729386: step 1850, loss = 1.61 (11751.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:27.598111: step 1860, loss = 1.64 (12464.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:33:28.484705: step 1870, loss = 1.54 (11696.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:29.361337: step 1880, loss = 1.53 (11873.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:30.246112: step 1890, loss = 1.57 (12521.9 examples/sec; 0.041 sec/batch)
2017-10-21 12:33:31.126056: step 1900, loss = 1.49 (12847.6 examples/sec; 0.040 sec/batch)
2017-10-21 12:33:32.413215: step 1910, loss = 1.54 (17706.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:33:33.013459: step 1920, loss = 1.55 (17420.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:33:33.898329: step 1930, loss = 1.49 (11273.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:33:34.770716: step 1940, loss = 1.59 (11324.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:33:35.647257: step 1950, loss = 1.52 (11306.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:33:36.519814: step 1960, loss = 1.52 (12075.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:37.408679: step 1970, loss = 1.53 (11171.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:33:38.289415: step 1980, loss = 1.47 (11983.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:39.169667: step 1990, loss = 1.47 (11835.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:40.059845: step 2000, loss = 1.53 (11598.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:41.463960: step 2010, loss = 1.47 (16352.0 examples/sec; 0.031 sec/batch)
2017-10-21 12:33:42.060697: step 2020, loss = 1.54 (15763.4 examples/sec; 0.032 sec/batch)
2017-10-21 12:33:42.945429: step 2030, loss = 1.51 (11739.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:43.806788: step 2040, loss = 1.60 (12391.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:33:44.679777: step 2050, loss = 1.46 (13431.6 examples/sec; 0.038 sec/batch)
2017-10-21 12:33:45.580168: step 2060, loss = 1.40 (12548.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:33:46.463922: step 2070, loss = 1.39 (12326.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:47.358742: step 2080, loss = 1.47 (12563.6 examples/sec; 0.041 sec/batch)
2017-10-21 12:33:48.225236: step 2090, loss = 1.47 (12062.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:49.112915: step 2100, loss = 1.43 (10757.3 examples/sec; 0.048 sec/batch)
2017-10-21 12:33:50.385281: step 2110, loss = 1.71 (16453.5 examples/sec; 0.031 sec/batch)
2017-10-21 12:33:50.968643: step 2120, loss = 1.45 (17525.5 examples/sec; 0.029 sec/batch)
2017-10-21 12:33:51.856641: step 2130, loss = 1.36 (10713.0 examples/sec; 0.048 sec/batch)
2017-10-21 12:33:52.713852: step 2140, loss = 1.52 (12235.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:53.595359: step 2150, loss = 1.45 (10773.2 examples/sec; 0.048 sec/batch)
2017-10-21 12:33:54.472832: step 2160, loss = 1.43 (11704.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:33:55.368674: step 2170, loss = 1.38 (12188.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:56.256693: step 2180, loss = 1.39 (11962.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:33:57.120013: step 2190, loss = 1.33 (12273.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:33:58.017461: step 2200, loss = 1.42 (10886.3 examples/sec; 0.047 sec/batch)
2017-10-21 12:33:59.292503: step 2210, loss = 1.55 (18188.5 examples/sec; 0.028 sec/batch)
2017-10-21 12:33:59.908023: step 2220, loss = 1.44 (12530.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:34:00.815624: step 2230, loss = 1.45 (12246.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:01.688796: step 2240, loss = 1.42 (11776.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:02.565809: step 2250, loss = 1.42 (11687.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:03.428766: step 2260, loss = 1.35 (11834.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:04.322782: step 2270, loss = 1.31 (11408.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:05.196315: step 2280, loss = 1.39 (11277.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:06.064627: step 2290, loss = 1.34 (12166.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:06.933026: step 2300, loss = 1.37 (12072.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:08.212677: step 2310, loss = 1.36 (17753.2 examples/sec; 0.029 sec/batch)
2017-10-21 12:34:08.826251: step 2320, loss = 1.32 (15377.5 examples/sec; 0.033 sec/batch)
2017-10-21 12:34:09.693530: step 2330, loss = 1.29 (11597.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:10.570414: step 2340, loss = 1.27 (11475.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:11.456657: step 2350, loss = 1.22 (11321.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:12.316967: step 2360, loss = 1.28 (11871.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:13.195515: step 2370, loss = 1.37 (12273.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:14.068540: step 2380, loss = 1.33 (11825.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:14.965204: step 2390, loss = 1.38 (11266.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:15.850618: step 2400, loss = 1.38 (11710.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:17.110193: step 2410, loss = 1.28 (17708.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:34:17.706954: step 2420, loss = 1.32 (17039.7 examples/sec; 0.030 sec/batch)
2017-10-21 12:34:18.596551: step 2430, loss = 1.32 (11278.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:19.459722: step 2440, loss = 1.33 (11965.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:20.335879: step 2450, loss = 1.39 (11421.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:21.220798: step 2460, loss = 1.23 (12178.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:22.095581: step 2470, loss = 1.22 (12068.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:22.964431: step 2480, loss = 1.34 (12503.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:34:23.831808: step 2490, loss = 1.25 (11365.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:24.732641: step 2500, loss = 1.45 (10007.1 examples/sec; 0.051 sec/batch)
2017-10-21 12:34:26.045491: step 2510, loss = 1.38 (16599.5 examples/sec; 0.031 sec/batch)
2017-10-21 12:34:26.654304: step 2520, loss = 1.24 (13246.1 examples/sec; 0.039 sec/batch)
2017-10-21 12:34:27.536309: step 2530, loss = 1.39 (11943.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:28.415532: step 2540, loss = 1.19 (11421.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:29.290224: step 2550, loss = 1.48 (13327.3 examples/sec; 0.038 sec/batch)
2017-10-21 12:34:30.177378: step 2560, loss = 1.27 (11515.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:31.044073: step 2570, loss = 1.23 (11593.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:31.933295: step 2580, loss = 1.12 (11889.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:32.826345: step 2590, loss = 1.17 (11952.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:33.713255: step 2600, loss = 1.25 (11274.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:35.014781: step 2610, loss = 1.16 (18079.1 examples/sec; 0.028 sec/batch)
2017-10-21 12:34:35.618517: step 2620, loss = 1.25 (17438.2 examples/sec; 0.029 sec/batch)
2017-10-21 12:34:36.506117: step 2630, loss = 1.36 (11843.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:37.380469: step 2640, loss = 1.34 (11651.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:38.255027: step 2650, loss = 1.21 (12228.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:39.122586: step 2660, loss = 1.15 (12223.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:39.984864: step 2670, loss = 1.37 (11348.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:40.862897: step 2680, loss = 1.26 (11207.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:34:41.723826: step 2690, loss = 1.31 (12094.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:42.602464: step 2700, loss = 1.14 (12458.8 examples/sec; 0.041 sec/batch)
2017-10-21 12:34:43.887553: step 2710, loss = 1.25 (16598.3 examples/sec; 0.031 sec/batch)
2017-10-21 12:34:44.489775: step 2720, loss = 1.43 (17910.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:34:45.358912: step 2730, loss = 1.25 (11708.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:46.240853: step 2740, loss = 1.15 (11795.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:47.117491: step 2750, loss = 1.28 (12833.0 examples/sec; 0.040 sec/batch)
2017-10-21 12:34:47.974749: step 2760, loss = 1.36 (11841.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:34:48.833453: step 2770, loss = 1.17 (12674.0 examples/sec; 0.040 sec/batch)
2017-10-21 12:34:49.713830: step 2780, loss = 1.21 (11455.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:34:50.578681: step 2790, loss = 1.38 (12477.6 examples/sec; 0.041 sec/batch)
2017-10-21 12:34:51.453735: step 2800, loss = 1.13 (11584.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:52.722754: step 2810, loss = 1.11 (18512.5 examples/sec; 0.028 sec/batch)
2017-10-21 12:34:53.342020: step 2820, loss = 1.28 (12530.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:34:54.234089: step 2830, loss = 1.19 (12576.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:34:55.114740: step 2840, loss = 1.22 (12179.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:55.995795: step 2850, loss = 1.09 (10986.8 examples/sec; 0.047 sec/batch)
2017-10-21 12:34:56.869909: step 2860, loss = 1.17 (11615.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:34:57.742599: step 2870, loss = 1.13 (12062.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:58.628643: step 2880, loss = 1.24 (12237.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:34:59.508937: step 2890, loss = 1.12 (10878.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:35:00.371963: step 2900, loss = 1.20 (11186.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:01.731101: step 2910, loss = 1.17 (17106.4 examples/sec; 0.030 sec/batch)
2017-10-21 12:35:02.332962: step 2920, loss = 1.18 (17641.3 examples/sec; 0.029 sec/batch)
2017-10-21 12:35:03.204968: step 2930, loss = 1.17 (10497.5 examples/sec; 0.049 sec/batch)
2017-10-21 12:35:04.089249: step 2940, loss = 1.00 (11407.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:04.976165: step 2950, loss = 1.22 (10855.0 examples/sec; 0.047 sec/batch)
2017-10-21 12:35:05.877815: step 2960, loss = 1.13 (10445.3 examples/sec; 0.049 sec/batch)
2017-10-21 12:35:06.751388: step 2970, loss = 1.08 (11582.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:07.638236: step 2980, loss = 1.13 (12099.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:35:08.522233: step 2990, loss = 1.14 (11280.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:09.422521: step 3000, loss = 1.12 (11088.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:10.786600: step 3010, loss = 1.10 (17285.0 examples/sec; 0.030 sec/batch)
2017-10-21 12:35:11.406617: step 3020, loss = 1.11 (15964.0 examples/sec; 0.032 sec/batch)
2017-10-21 12:35:12.278148: step 3030, loss = 1.11 (11021.9 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:13.149828: step 3040, loss = 1.02 (12932.4 examples/sec; 0.040 sec/batch)
2017-10-21 12:35:14.012552: step 3050, loss = 1.17 (12056.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:35:14.889065: step 3060, loss = 1.11 (12548.9 examples/sec; 0.041 sec/batch)
2017-10-21 12:35:15.765318: step 3070, loss = 1.26 (11200.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:16.644714: step 3080, loss = 1.02 (10972.6 examples/sec; 0.047 sec/batch)
2017-10-21 12:35:17.510391: step 3090, loss = 1.15 (12252.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:35:18.383354: step 3100, loss = 1.03 (12257.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:35:19.673965: step 3110, loss = 1.09 (17883.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:35:20.266992: step 3120, loss = 1.05 (17916.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:35:21.125521: step 3130, loss = 1.01 (11786.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:35:22.017780: step 3140, loss = 1.13 (12442.3 examples/sec; 0.041 sec/batch)
2017-10-21 12:35:22.899684: step 3150, loss = 1.12 (11079.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:23.770139: step 3160, loss = 1.08 (12240.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:35:24.643591: step 3170, loss = 1.14 (11721.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:25.530501: step 3180, loss = 1.05 (11576.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:26.407032: step 3190, loss = 1.01 (11327.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:27.284473: step 3200, loss = 1.20 (11507.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:28.558429: step 3210, loss = 1.02 (18077.0 examples/sec; 0.028 sec/batch)
2017-10-21 12:35:29.173487: step 3220, loss = 1.02 (17348.9 examples/sec; 0.030 sec/batch)
2017-10-21 12:35:30.034879: step 3230, loss = 0.97 (11170.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:30.897999: step 3240, loss = 1.12 (11709.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:31.777706: step 3250, loss = 1.11 (11875.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:35:32.656641: step 3260, loss = 1.17 (11209.2 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:33.534422: step 3270, loss = 1.07 (11425.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:34.439441: step 3280, loss = 1.09 (10932.4 examples/sec; 0.047 sec/batch)
2017-10-21 12:35:35.356210: step 3290, loss = 1.23 (11142.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:36.262055: step 3300, loss = 1.10 (11494.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:37.545106: step 3310, loss = 1.04 (17900.9 examples/sec; 0.029 sec/batch)
2017-10-21 12:35:38.141077: step 3320, loss = 1.05 (15360.4 examples/sec; 0.033 sec/batch)
2017-10-21 12:35:39.019359: step 3330, loss = 1.03 (11268.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:39.950093: step 3340, loss = 1.05 (13222.5 examples/sec; 0.039 sec/batch)
2017-10-21 12:35:40.872287: step 3350, loss = 0.92 (11021.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:41.775426: step 3360, loss = 0.98 (10401.5 examples/sec; 0.049 sec/batch)
2017-10-21 12:35:42.701302: step 3370, loss = 1.06 (11036.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:35:43.578799: step 3380, loss = 1.09 (12024.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:35:44.448376: step 3390, loss = 1.03 (11381.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:35:45.376922: step 3400, loss = 1.01 (11506.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:46.711357: step 3410, loss = 1.03 (18373.8 examples/sec; 0.028 sec/batch)
2017-10-21 12:35:47.324924: step 3420, loss = 1.12 (13074.9 examples/sec; 0.039 sec/batch)
2017-10-21 12:35:48.246286: step 3430, loss = 1.04 (10792.0 examples/sec; 0.047 sec/batch)
2017-10-21 12:35:49.139291: step 3440, loss = 1.07 (12430.3 examples/sec; 0.041 sec/batch)
2017-10-21 12:35:50.087779: step 3450, loss = 1.01 (10003.4 examples/sec; 0.051 sec/batch)
2017-10-21 12:35:50.995231: step 3460, loss = 1.00 (10901.7 examples/sec; 0.047 sec/batch)
2017-10-21 12:35:51.879584: step 3470, loss = 1.04 (12231.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:35:52.771971: step 3480, loss = 1.00 (11818.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:35:53.691146: step 3490, loss = 0.96 (10419.6 examples/sec; 0.049 sec/batch)
2017-10-21 12:35:54.639208: step 3500, loss = 1.05 (10517.8 examples/sec; 0.049 sec/batch)
2017-10-21 12:35:55.917838: step 3510, loss = 1.07 (18445.4 examples/sec; 0.028 sec/batch)
2017-10-21 12:35:56.509930: step 3520, loss = 1.09 (14993.4 examples/sec; 0.034 sec/batch)
2017-10-21 12:35:57.403307: step 3530, loss = 0.98 (11666.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:35:58.332314: step 3540, loss = 0.94 (10182.5 examples/sec; 0.050 sec/batch)
2017-10-21 12:35:59.283184: step 3550, loss = 1.09 (11477.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:00.161091: step 3560, loss = 0.89 (11477.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:01.139262: step 3570, loss = 1.01 (11266.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:02.043391: step 3580, loss = 1.04 (10884.2 examples/sec; 0.047 sec/batch)
2017-10-21 12:36:02.915350: step 3590, loss = 0.94 (11617.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:03.811404: step 3600, loss = 0.97 (10288.6 examples/sec; 0.050 sec/batch)
2017-10-21 12:36:05.102736: step 3610, loss = 1.07 (15297.7 examples/sec; 0.033 sec/batch)
2017-10-21 12:36:05.699331: step 3620, loss = 0.99 (15251.6 examples/sec; 0.034 sec/batch)
2017-10-21 12:36:06.617578: step 3630, loss = 1.05 (10538.0 examples/sec; 0.049 sec/batch)
2017-10-21 12:36:07.500309: step 3640, loss = 0.92 (12023.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:08.413381: step 3650, loss = 0.98 (12053.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:36:09.300408: step 3660, loss = 1.01 (11129.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:36:10.169313: step 3670, loss = 0.95 (11841.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:11.066603: step 3680, loss = 1.11 (10271.2 examples/sec; 0.050 sec/batch)
2017-10-21 12:36:11.942003: step 3690, loss = 0.96 (11350.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:12.820045: step 3700, loss = 0.95 (11812.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:14.088220: step 3710, loss = 1.10 (18265.9 examples/sec; 0.028 sec/batch)
2017-10-21 12:36:14.688580: step 3720, loss = 0.95 (13088.0 examples/sec; 0.039 sec/batch)
2017-10-21 12:36:15.546446: step 3730, loss = 0.99 (11652.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:16.426988: step 3740, loss = 0.99 (11165.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:36:17.294618: step 3750, loss = 0.98 (11219.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:36:18.171771: step 3760, loss = 0.96 (12282.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:36:19.040297: step 3770, loss = 1.06 (11933.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:19.921797: step 3780, loss = 0.92 (11316.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:20.808021: step 3790, loss = 0.95 (11437.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:21.665516: step 3800, loss = 1.11 (11778.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:22.938897: step 3810, loss = 1.02 (17624.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:36:23.571837: step 3820, loss = 0.95 (10531.5 examples/sec; 0.049 sec/batch)
2017-10-21 12:36:24.467169: step 3830, loss = 1.09 (10891.1 examples/sec; 0.047 sec/batch)
2017-10-21 12:36:25.346204: step 3840, loss = 0.87 (11005.1 examples/sec; 0.047 sec/batch)
2017-10-21 12:36:26.220849: step 3850, loss = 1.03 (11674.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:27.097869: step 3860, loss = 0.97 (11421.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:27.963450: step 3870, loss = 0.92 (12952.1 examples/sec; 0.040 sec/batch)
2017-10-21 12:36:28.828661: step 3880, loss = 0.92 (12359.3 examples/sec; 0.041 sec/batch)
2017-10-21 12:36:29.706627: step 3890, loss = 1.04 (11708.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:30.594541: step 3900, loss = 0.91 (11360.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:31.877976: step 3910, loss = 1.01 (16360.0 examples/sec; 0.031 sec/batch)
2017-10-21 12:36:32.464881: step 3920, loss = 0.96 (15732.3 examples/sec; 0.033 sec/batch)
2017-10-21 12:36:33.333005: step 3930, loss = 0.94 (11805.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:34.227516: step 3940, loss = 0.91 (11690.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:35.102593: step 3950, loss = 0.94 (11903.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:35.989164: step 3960, loss = 0.95 (11646.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:36.883593: step 3970, loss = 0.95 (11517.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:37.778012: step 3980, loss = 0.88 (11390.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:38.660972: step 3990, loss = 1.08 (12299.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:36:39.532102: step 4000, loss = 0.93 (11790.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:36:40.916690: step 4010, loss = 0.94 (18860.2 examples/sec; 0.027 sec/batch)
2017-10-21 12:36:41.530124: step 4020, loss = 0.98 (17497.5 examples/sec; 0.029 sec/batch)
2017-10-21 12:36:42.404788: step 4030, loss = 1.00 (12194.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:36:43.304736: step 4040, loss = 0.97 (11251.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:36:44.201152: step 4050, loss = 0.87 (11551.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:45.078386: step 4060, loss = 0.94 (10690.3 examples/sec; 0.048 sec/batch)
2017-10-21 12:36:45.955759: step 4070, loss = 1.08 (11345.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:36:46.845103: step 4080, loss = 0.94 (11730.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:47.728308: step 4090, loss = 0.98 (12459.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:36:48.608608: step 4100, loss = 0.90 (11170.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:36:49.895971: step 4110, loss = 0.91 (17562.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:36:50.503595: step 4120, loss = 0.94 (14195.9 examples/sec; 0.036 sec/batch)
2017-10-21 12:36:51.380044: step 4130, loss = 0.99 (12603.3 examples/sec; 0.041 sec/batch)
2017-10-21 12:36:52.268803: step 4140, loss = 0.94 (10847.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:36:53.162757: step 4150, loss = 0.97 (12077.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:36:54.056820: step 4160, loss = 0.98 (13428.8 examples/sec; 0.038 sec/batch)
2017-10-21 12:36:54.938969: step 4170, loss = 1.00 (12067.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:36:55.807378: step 4180, loss = 0.94 (11623.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:36:56.705351: step 4190, loss = 0.87 (11047.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:36:57.583414: step 4200, loss = 0.90 (12623.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:36:58.865181: step 4210, loss = 0.87 (18148.4 examples/sec; 0.028 sec/batch)
2017-10-21 12:36:59.461789: step 4220, loss = 0.92 (16552.5 examples/sec; 0.031 sec/batch)
2017-10-21 12:37:00.349960: step 4230, loss = 0.88 (11464.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:01.269322: step 4240, loss = 0.85 (10669.0 examples/sec; 0.048 sec/batch)
2017-10-21 12:37:02.151286: step 4250, loss = 0.88 (11629.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:03.027533: step 4260, loss = 0.97 (12482.4 examples/sec; 0.041 sec/batch)
2017-10-21 12:37:03.907963: step 4270, loss = 0.90 (11553.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:04.777372: step 4280, loss = 0.88 (11683.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:05.674638: step 4290, loss = 0.86 (10276.2 examples/sec; 0.050 sec/batch)
2017-10-21 12:37:06.557233: step 4300, loss = 0.95 (12143.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:07.829643: step 4310, loss = 0.97 (18155.8 examples/sec; 0.028 sec/batch)
2017-10-21 12:37:08.455241: step 4320, loss = 0.91 (18055.0 examples/sec; 0.028 sec/batch)
2017-10-21 12:37:09.335396: step 4330, loss = 1.13 (12335.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:10.206820: step 4340, loss = 0.91 (12610.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:37:11.092212: step 4350, loss = 0.96 (12025.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:37:11.975164: step 4360, loss = 0.98 (11600.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:12.852154: step 4370, loss = 0.94 (11618.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:13.727804: step 4380, loss = 0.89 (11649.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:14.614374: step 4390, loss = 0.93 (11913.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:37:15.482193: step 4400, loss = 0.83 (13182.1 examples/sec; 0.039 sec/batch)
2017-10-21 12:37:16.764195: step 4410, loss = 0.87 (18120.2 examples/sec; 0.028 sec/batch)
2017-10-21 12:37:17.367528: step 4420, loss = 0.81 (13103.4 examples/sec; 0.039 sec/batch)
2017-10-21 12:37:18.253642: step 4430, loss = 0.78 (11457.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:19.154860: step 4440, loss = 0.95 (9883.4 examples/sec; 0.052 sec/batch)
2017-10-21 12:37:20.023535: step 4450, loss = 0.82 (11373.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:20.927301: step 4460, loss = 0.93 (11693.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:21.804154: step 4470, loss = 0.93 (10788.9 examples/sec; 0.047 sec/batch)
2017-10-21 12:37:22.691103: step 4480, loss = 0.87 (11712.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:23.562887: step 4490, loss = 0.83 (12172.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:24.447522: step 4500, loss = 0.87 (11598.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:25.723170: step 4510, loss = 0.86 (17670.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:37:26.329305: step 4520, loss = 0.95 (17439.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:37:27.217124: step 4530, loss = 0.88 (10962.6 examples/sec; 0.047 sec/batch)
2017-10-21 12:37:28.095597: step 4540, loss = 1.02 (11589.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:29.005500: step 4550, loss = 0.82 (10627.6 examples/sec; 0.048 sec/batch)
2017-10-21 12:37:29.871741: step 4560, loss = 0.82 (13543.3 examples/sec; 0.038 sec/batch)
2017-10-21 12:37:30.748170: step 4570, loss = 0.86 (12026.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:37:31.637754: step 4580, loss = 0.76 (11331.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:32.525126: step 4590, loss = 1.02 (12159.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:33.413979: step 4600, loss = 0.89 (10266.2 examples/sec; 0.050 sec/batch)
2017-10-21 12:37:34.671424: step 4610, loss = 0.94 (17147.6 examples/sec; 0.030 sec/batch)
2017-10-21 12:37:35.277289: step 4620, loss = 0.87 (12669.1 examples/sec; 0.040 sec/batch)
2017-10-21 12:37:36.161875: step 4630, loss = 0.88 (11274.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:37.021390: step 4640, loss = 0.93 (11153.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:37:37.901223: step 4650, loss = 0.92 (11327.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:38.788003: step 4660, loss = 0.80 (11628.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:39.668937: step 4670, loss = 0.83 (11638.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:40.545210: step 4680, loss = 0.79 (12118.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:41.453611: step 4690, loss = 0.87 (11062.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:37:42.340349: step 4700, loss = 0.79 (12059.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:43.608644: step 4710, loss = 0.81 (17597.5 examples/sec; 0.029 sec/batch)
2017-10-21 12:37:44.218510: step 4720, loss = 0.89 (17500.4 examples/sec; 0.029 sec/batch)
2017-10-21 12:37:45.094533: step 4730, loss = 0.87 (11546.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:45.979240: step 4740, loss = 0.93 (11644.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:46.870113: step 4750, loss = 0.89 (10677.2 examples/sec; 0.048 sec/batch)
2017-10-21 12:37:47.755738: step 4760, loss = 1.04 (12002.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:37:48.622448: step 4770, loss = 0.84 (11609.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:37:49.511385: step 4780, loss = 0.79 (11057.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:37:50.401712: step 4790, loss = 0.81 (11403.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:51.286104: step 4800, loss = 0.85 (12193.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:37:52.556709: step 4810, loss = 0.80 (18923.5 examples/sec; 0.027 sec/batch)
2017-10-21 12:37:53.150987: step 4820, loss = 0.85 (16457.5 examples/sec; 0.031 sec/batch)
2017-10-21 12:37:54.039345: step 4830, loss = 0.76 (10979.3 examples/sec; 0.047 sec/batch)
2017-10-21 12:37:54.944147: step 4840, loss = 0.97 (10524.8 examples/sec; 0.049 sec/batch)
2017-10-21 12:37:55.811850: step 4850, loss = 0.86 (14340.4 examples/sec; 0.036 sec/batch)
2017-10-21 12:37:56.732446: step 4860, loss = 0.98 (10863.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:37:57.607919: step 4870, loss = 0.80 (11304.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:37:58.484965: step 4880, loss = 0.92 (11107.9 examples/sec; 0.046 sec/batch)
2017-10-21 12:37:59.358418: step 4890, loss = 0.96 (12801.8 examples/sec; 0.040 sec/batch)
2017-10-21 12:38:00.240227: step 4900, loss = 0.85 (11957.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:01.528894: step 4910, loss = 0.79 (17809.5 examples/sec; 0.029 sec/batch)
2017-10-21 12:38:02.154410: step 4920, loss = 0.92 (17939.3 examples/sec; 0.029 sec/batch)
2017-10-21 12:38:03.036938: step 4930, loss = 1.00 (10858.1 examples/sec; 0.047 sec/batch)
2017-10-21 12:38:03.906134: step 4940, loss = 0.80 (12049.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:38:04.779518: step 4950, loss = 0.77 (11660.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:05.672505: step 4960, loss = 0.85 (11114.2 examples/sec; 0.046 sec/batch)
2017-10-21 12:38:06.582678: step 4970, loss = 0.92 (11532.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:07.447817: step 4980, loss = 0.90 (12895.8 examples/sec; 0.040 sec/batch)
2017-10-21 12:38:08.352250: step 4990, loss = 0.80 (12721.4 examples/sec; 0.040 sec/batch)
2017-10-21 12:38:09.217270: step 5000, loss = 0.90 (11566.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:10.600078: step 5010, loss = 0.74 (18956.6 examples/sec; 0.027 sec/batch)
2017-10-21 12:38:11.204561: step 5020, loss = 0.83 (14653.1 examples/sec; 0.035 sec/batch)
2017-10-21 12:38:12.075403: step 5030, loss = 0.86 (11687.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:12.948637: step 5040, loss = 0.86 (12382.9 examples/sec; 0.041 sec/batch)
2017-10-21 12:38:13.815093: step 5050, loss = 0.88 (11800.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:14.712504: step 5060, loss = 0.90 (11632.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:15.600452: step 5070, loss = 0.79 (11642.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:16.477170: step 5080, loss = 0.84 (10916.8 examples/sec; 0.047 sec/batch)
2017-10-21 12:38:17.347746: step 5090, loss = 0.91 (11703.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:18.212607: step 5100, loss = 0.81 (11826.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:19.503128: step 5110, loss = 0.78 (16250.1 examples/sec; 0.032 sec/batch)
2017-10-21 12:38:20.104966: step 5120, loss = 0.81 (17502.5 examples/sec; 0.029 sec/batch)
2017-10-21 12:38:20.990071: step 5130, loss = 0.80 (12617.3 examples/sec; 0.041 sec/batch)
2017-10-21 12:38:21.860282: step 5140, loss = 0.92 (11566.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:22.735724: step 5150, loss = 0.83 (12354.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:38:23.621751: step 5160, loss = 0.90 (11331.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:24.487693: step 5170, loss = 0.75 (12107.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:38:25.367089: step 5180, loss = 0.86 (11862.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:26.240068: step 5190, loss = 0.82 (11401.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:27.140859: step 5200, loss = 0.85 (11492.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:28.428048: step 5210, loss = 0.85 (17552.5 examples/sec; 0.029 sec/batch)
2017-10-21 12:38:29.025738: step 5220, loss = 0.74 (17024.2 examples/sec; 0.030 sec/batch)
2017-10-21 12:38:29.910034: step 5230, loss = 0.78 (11315.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:30.778852: step 5240, loss = 0.96 (11554.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:31.648337: step 5250, loss = 0.80 (11893.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:32.516868: step 5260, loss = 0.86 (10778.4 examples/sec; 0.048 sec/batch)
2017-10-21 12:38:33.387450: step 5270, loss = 0.90 (11346.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:34.269817: step 5280, loss = 0.93 (11666.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:35.138105: step 5290, loss = 0.78 (12039.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:36.013427: step 5300, loss = 0.77 (11818.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:37.269548: step 5310, loss = 0.80 (18013.6 examples/sec; 0.028 sec/batch)
2017-10-21 12:38:37.883436: step 5320, loss = 0.85 (16977.7 examples/sec; 0.030 sec/batch)
2017-10-21 12:38:38.745239: step 5330, loss = 0.75 (11377.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:39.626812: step 5340, loss = 0.79 (11542.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:40.518346: step 5350, loss = 0.82 (11950.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:41.390444: step 5360, loss = 0.79 (11377.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:42.262004: step 5370, loss = 0.80 (11802.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:43.149659: step 5380, loss = 0.82 (10558.5 examples/sec; 0.048 sec/batch)
2017-10-21 12:38:44.040893: step 5390, loss = 0.87 (11855.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:44.918157: step 5400, loss = 0.79 (12025.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:46.194311: step 5410, loss = 0.81 (17240.1 examples/sec; 0.030 sec/batch)
2017-10-21 12:38:46.794249: step 5420, loss = 0.74 (17760.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:38:47.674765: step 5430, loss = 0.83 (11181.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:38:48.549183: step 5440, loss = 0.82 (12029.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:49.417508: step 5450, loss = 0.79 (10992.6 examples/sec; 0.047 sec/batch)
2017-10-21 12:38:50.288596: step 5460, loss = 0.90 (11777.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:51.172011: step 5470, loss = 0.81 (10443.5 examples/sec; 0.049 sec/batch)
2017-10-21 12:38:52.065801: step 5480, loss = 0.76 (12087.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:38:52.928331: step 5490, loss = 0.86 (12565.4 examples/sec; 0.041 sec/batch)
2017-10-21 12:38:53.819134: step 5500, loss = 0.76 (11329.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:55.114817: step 5510, loss = 0.81 (16953.8 examples/sec; 0.030 sec/batch)
2017-10-21 12:38:55.715897: step 5520, loss = 0.79 (16966.1 examples/sec; 0.030 sec/batch)
2017-10-21 12:38:56.576897: step 5530, loss = 0.80 (11499.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:38:57.456879: step 5540, loss = 0.73 (11764.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:38:58.326520: step 5550, loss = 0.85 (11849.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:38:59.209284: step 5560, loss = 0.95 (11758.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:00.082173: step 5570, loss = 0.88 (11658.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:01.001954: step 5580, loss = 0.74 (12018.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:01.893979: step 5590, loss = 0.95 (11170.9 examples/sec; 0.046 sec/batch)
2017-10-21 12:39:02.781100: step 5600, loss = 0.85 (11838.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:04.095806: step 5610, loss = 0.77 (17078.6 examples/sec; 0.030 sec/batch)
2017-10-21 12:39:04.742635: step 5620, loss = 0.81 (10734.5 examples/sec; 0.048 sec/batch)
2017-10-21 12:39:05.653436: step 5630, loss = 0.84 (12078.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:06.597640: step 5640, loss = 0.75 (11519.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:07.497663: step 5650, loss = 0.88 (11375.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:08.384051: step 5660, loss = 0.85 (11422.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:09.266804: step 5670, loss = 0.92 (11740.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:10.146097: step 5680, loss = 0.89 (11978.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:11.024620: step 5690, loss = 0.78 (12199.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:11.903092: step 5700, loss = 0.77 (11857.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:13.190842: step 5710, loss = 0.82 (17783.2 examples/sec; 0.029 sec/batch)
2017-10-21 12:39:13.786097: step 5720, loss = 0.84 (16605.5 examples/sec; 0.031 sec/batch)
2017-10-21 12:39:14.651778: step 5730, loss = 0.77 (12424.6 examples/sec; 0.041 sec/batch)
2017-10-21 12:39:15.535715: step 5740, loss = 0.88 (11909.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:16.409147: step 5750, loss = 0.76 (11263.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:17.293590: step 5760, loss = 0.89 (13032.4 examples/sec; 0.039 sec/batch)
2017-10-21 12:39:18.173938: step 5770, loss = 0.76 (12047.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:19.060621: step 5780, loss = 0.85 (11647.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:19.928796: step 5790, loss = 0.84 (11892.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:20.818417: step 5800, loss = 0.76 (11790.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:22.072275: step 5810, loss = 0.77 (17812.2 examples/sec; 0.029 sec/batch)
2017-10-21 12:39:22.708865: step 5820, loss = 0.74 (11113.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:39:23.583680: step 5830, loss = 0.75 (12324.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:24.479355: step 5840, loss = 0.86 (11629.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:25.359551: step 5850, loss = 0.73 (11930.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:26.268733: step 5860, loss = 0.78 (11448.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:27.144921: step 5870, loss = 0.80 (11910.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:28.024290: step 5880, loss = 0.81 (11355.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:28.903681: step 5890, loss = 0.80 (11251.2 examples/sec; 0.046 sec/batch)
2017-10-21 12:39:29.769983: step 5900, loss = 0.73 (12310.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:31.057510: step 5910, loss = 0.75 (16894.4 examples/sec; 0.030 sec/batch)
2017-10-21 12:39:31.648653: step 5920, loss = 0.81 (17372.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:39:32.529219: step 5930, loss = 0.70 (11521.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:33.395905: step 5940, loss = 0.82 (12856.6 examples/sec; 0.040 sec/batch)
2017-10-21 12:39:34.290905: step 5950, loss = 0.75 (11150.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:39:35.179512: step 5960, loss = 0.73 (11629.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:36.050069: step 5970, loss = 0.85 (11314.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:36.923186: step 5980, loss = 0.74 (12010.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:37.801273: step 5990, loss = 0.72 (11341.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:38.683810: step 6000, loss = 0.97 (11408.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:40.112255: step 6010, loss = 0.72 (18095.2 examples/sec; 0.028 sec/batch)
2017-10-21 12:39:40.732861: step 6020, loss = 0.72 (13521.9 examples/sec; 0.038 sec/batch)
2017-10-21 12:39:41.617004: step 6030, loss = 0.79 (11168.8 examples/sec; 0.046 sec/batch)
2017-10-21 12:39:42.491095: step 6040, loss = 0.76 (11415.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:43.371363: step 6050, loss = 0.81 (11369.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:44.252480: step 6060, loss = 0.90 (10996.2 examples/sec; 0.047 sec/batch)
2017-10-21 12:39:45.118621: step 6070, loss = 0.75 (12239.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:45.977235: step 6080, loss = 0.84 (12043.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:39:46.857804: step 6090, loss = 0.77 (11255.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:47.732857: step 6100, loss = 0.84 (11440.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:49.001041: step 6110, loss = 0.72 (17773.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:39:49.600679: step 6120, loss = 0.76 (15364.8 examples/sec; 0.033 sec/batch)
2017-10-21 12:39:50.470658: step 6130, loss = 0.73 (11606.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:51.362167: step 6140, loss = 0.84 (11320.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:52.237964: step 6150, loss = 0.74 (11517.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:53.106023: step 6160, loss = 0.76 (12123.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:39:53.968868: step 6170, loss = 0.83 (12929.8 examples/sec; 0.040 sec/batch)
2017-10-21 12:39:54.846420: step 6180, loss = 0.81 (11461.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:39:55.721943: step 6190, loss = 0.74 (11641.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:39:56.604646: step 6200, loss = 0.80 (12473.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:39:57.883830: step 6210, loss = 0.64 (17240.2 examples/sec; 0.030 sec/batch)
2017-10-21 12:39:58.485403: step 6220, loss = 0.83 (17021.3 examples/sec; 0.030 sec/batch)
2017-10-21 12:39:59.349581: step 6230, loss = 0.80 (11914.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:00.223347: step 6240, loss = 0.74 (12700.3 examples/sec; 0.040 sec/batch)
2017-10-21 12:40:01.141136: step 6250, loss = 0.74 (11879.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:02.023885: step 6260, loss = 0.76 (11183.4 examples/sec; 0.046 sec/batch)
2017-10-21 12:40:02.896176: step 6270, loss = 0.78 (12338.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:40:03.766635: step 6280, loss = 0.77 (11996.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:04.655674: step 6290, loss = 0.77 (11183.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:40:05.535437: step 6300, loss = 0.76 (11501.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:06.813894: step 6310, loss = 0.78 (18029.6 examples/sec; 0.028 sec/batch)
2017-10-21 12:40:07.410388: step 6320, loss = 0.83 (16942.9 examples/sec; 0.030 sec/batch)
2017-10-21 12:40:08.271271: step 6330, loss = 0.76 (11722.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:09.134250: step 6340, loss = 0.81 (12108.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:40:10.019697: step 6350, loss = 0.78 (12366.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:40:10.896542: step 6360, loss = 0.80 (11569.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:11.757699: step 6370, loss = 0.79 (11938.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:12.629901: step 6380, loss = 0.69 (12966.6 examples/sec; 0.039 sec/batch)
2017-10-21 12:40:13.512415: step 6390, loss = 0.73 (11272.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:14.381987: step 6400, loss = 0.72 (11554.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:15.656046: step 6410, loss = 0.73 (16640.9 examples/sec; 0.031 sec/batch)
2017-10-21 12:40:16.248335: step 6420, loss = 0.80 (17750.0 examples/sec; 0.029 sec/batch)
2017-10-21 12:40:17.138291: step 6430, loss = 0.87 (11337.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:18.035542: step 6440, loss = 0.80 (11962.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:18.904577: step 6450, loss = 0.68 (11481.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:19.793886: step 6460, loss = 0.86 (12062.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:40:20.669297: step 6470, loss = 0.85 (11362.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:21.549218: step 6480, loss = 0.76 (10694.8 examples/sec; 0.048 sec/batch)
2017-10-21 12:40:22.418858: step 6490, loss = 0.76 (13013.7 examples/sec; 0.039 sec/batch)
2017-10-21 12:40:23.306217: step 6500, loss = 0.79 (11480.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:24.605370: step 6510, loss = 0.78 (17723.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:40:25.210446: step 6520, loss = 0.80 (17019.9 examples/sec; 0.030 sec/batch)
2017-10-21 12:40:26.073627: step 6530, loss = 0.74 (11965.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:26.955098: step 6540, loss = 0.81 (11096.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:40:27.830245: step 6550, loss = 0.69 (10833.3 examples/sec; 0.047 sec/batch)
2017-10-21 12:40:28.713689: step 6560, loss = 0.82 (12165.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:40:29.595201: step 6570, loss = 0.75 (11929.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:30.471789: step 6580, loss = 0.83 (11657.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:31.344296: step 6590, loss = 0.82 (12614.2 examples/sec; 0.041 sec/batch)
2017-10-21 12:40:32.249921: step 6600, loss = 0.77 (10734.0 examples/sec; 0.048 sec/batch)
2017-10-21 12:40:33.521787: step 6610, loss = 0.74 (16666.4 examples/sec; 0.031 sec/batch)
2017-10-21 12:40:34.117209: step 6620, loss = 0.68 (17970.0 examples/sec; 0.028 sec/batch)
2017-10-21 12:40:34.979211: step 6630, loss = 0.69 (11554.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:35.854278: step 6640, loss = 0.76 (10457.0 examples/sec; 0.049 sec/batch)
2017-10-21 12:40:36.729680: step 6650, loss = 0.83 (11009.1 examples/sec; 0.047 sec/batch)
2017-10-21 12:40:37.599825: step 6660, loss = 0.76 (11944.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:38.464260: step 6670, loss = 0.82 (11979.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:39.352403: step 6680, loss = 0.77 (10580.3 examples/sec; 0.048 sec/batch)
2017-10-21 12:40:40.227217: step 6690, loss = 0.89 (11479.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:41.109459: step 6700, loss = 0.72 (12363.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:40:42.476502: step 6710, loss = 0.72 (14776.2 examples/sec; 0.035 sec/batch)
2017-10-21 12:40:43.092183: step 6720, loss = 0.67 (17536.2 examples/sec; 0.029 sec/batch)
2017-10-21 12:40:43.899054: step 6730, loss = 0.78 (11000.6 examples/sec; 0.047 sec/batch)
2017-10-21 12:40:44.784015: step 6740, loss = 0.82 (11749.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:45.667790: step 6750, loss = 0.75 (12020.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:46.541704: step 6760, loss = 0.85 (12335.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:40:47.408187: step 6770, loss = 0.69 (11523.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:48.285713: step 6780, loss = 0.73 (11600.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:49.149009: step 6790, loss = 0.85 (11533.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:50.035898: step 6800, loss = 0.75 (11697.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:40:51.312592: step 6810, loss = 0.73 (17628.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:40:51.915214: step 6820, loss = 0.75 (18189.4 examples/sec; 0.028 sec/batch)
2017-10-21 12:40:52.794897: step 6830, loss = 0.71 (11806.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:53.664323: step 6840, loss = 0.77 (11971.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:40:54.541299: step 6850, loss = 0.76 (11373.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:55.417336: step 6860, loss = 0.71 (12196.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:40:56.308893: step 6870, loss = 0.75 (10519.7 examples/sec; 0.049 sec/batch)
2017-10-21 12:40:57.197023: step 6880, loss = 0.78 (12189.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:40:58.084808: step 6890, loss = 0.68 (11483.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:40:58.957571: step 6900, loss = 0.79 (11472.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:00.233493: step 6910, loss = 0.87 (17634.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:41:00.856788: step 6920, loss = 0.79 (13765.0 examples/sec; 0.037 sec/batch)
2017-10-21 12:41:01.737118: step 6930, loss = 0.73 (10334.1 examples/sec; 0.050 sec/batch)
2017-10-21 12:41:02.623158: step 6940, loss = 0.74 (11543.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:03.520981: step 6950, loss = 0.71 (11558.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:04.394702: step 6960, loss = 0.71 (12302.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:41:05.268834: step 6970, loss = 0.86 (11567.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:06.140416: step 6980, loss = 0.71 (11759.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:07.015811: step 6990, loss = 0.78 (12586.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:41:07.889859: step 7000, loss = 0.67 (11120.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:09.269628: step 7010, loss = 0.91 (17779.3 examples/sec; 0.029 sec/batch)
2017-10-21 12:41:09.856991: step 7020, loss = 0.61 (17248.5 examples/sec; 0.030 sec/batch)
2017-10-21 12:41:10.708020: step 7030, loss = 0.72 (11840.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:11.586212: step 7040, loss = 0.78 (11638.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:12.470230: step 7050, loss = 0.70 (11377.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:13.354320: step 7060, loss = 0.73 (12499.4 examples/sec; 0.041 sec/batch)
2017-10-21 12:41:14.228739: step 7070, loss = 0.76 (11838.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:15.104416: step 7080, loss = 0.84 (11850.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:15.968749: step 7090, loss = 0.74 (11222.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:16.852924: step 7100, loss = 0.64 (11718.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:18.144439: step 7110, loss = 0.72 (17115.0 examples/sec; 0.030 sec/batch)
2017-10-21 12:41:18.756856: step 7120, loss = 0.73 (16355.7 examples/sec; 0.031 sec/batch)
2017-10-21 12:41:19.583491: step 7130, loss = 0.72 (11552.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:20.472216: step 7140, loss = 0.68 (11347.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:21.339532: step 7150, loss = 0.60 (11904.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:22.243157: step 7160, loss = 0.67 (11834.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:23.132160: step 7170, loss = 0.74 (11852.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:24.018719: step 7180, loss = 0.75 (11056.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:24.887525: step 7190, loss = 0.74 (12119.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:41:25.766211: step 7200, loss = 0.68 (10880.0 examples/sec; 0.047 sec/batch)
2017-10-21 12:41:27.054319: step 7210, loss = 0.68 (18211.3 examples/sec; 0.028 sec/batch)
2017-10-21 12:41:27.654327: step 7220, loss = 0.71 (17349.1 examples/sec; 0.030 sec/batch)
2017-10-21 12:41:28.529416: step 7230, loss = 0.83 (11048.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:29.400057: step 7240, loss = 0.81 (11699.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:30.268241: step 7250, loss = 0.66 (11556.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:31.146127: step 7260, loss = 0.81 (12481.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:41:32.025369: step 7270, loss = 0.75 (12458.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:41:32.893709: step 7280, loss = 0.81 (11267.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:33.750114: step 7290, loss = 0.66 (11479.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:34.638323: step 7300, loss = 0.71 (11237.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:35.923139: step 7310, loss = 0.72 (16985.2 examples/sec; 0.030 sec/batch)
2017-10-21 12:41:36.530792: step 7320, loss = 0.70 (15797.9 examples/sec; 0.032 sec/batch)
2017-10-21 12:41:37.397076: step 7330, loss = 0.68 (11022.0 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:38.284814: step 7340, loss = 0.77 (11376.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:39.146511: step 7350, loss = 0.71 (11818.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:40.011170: step 7360, loss = 0.83 (12444.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:41:40.899676: step 7370, loss = 0.65 (10402.5 examples/sec; 0.049 sec/batch)
2017-10-21 12:41:41.774055: step 7380, loss = 0.75 (11545.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:42.655039: step 7390, loss = 0.72 (11331.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:43.527041: step 7400, loss = 0.64 (11274.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:44.867167: step 7410, loss = 0.72 (16689.9 examples/sec; 0.031 sec/batch)
2017-10-21 12:41:45.459806: step 7420, loss = 0.69 (16258.8 examples/sec; 0.031 sec/batch)
2017-10-21 12:41:46.316486: step 7430, loss = 0.80 (11999.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:47.198390: step 7440, loss = 0.69 (12035.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:48.069207: step 7450, loss = 0.82 (11992.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:48.952161: step 7460, loss = 0.77 (10885.8 examples/sec; 0.047 sec/batch)
2017-10-21 12:41:49.844292: step 7470, loss = 0.80 (12122.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:41:50.719345: step 7480, loss = 0.74 (11608.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:51.590926: step 7490, loss = 0.76 (12766.5 examples/sec; 0.040 sec/batch)
2017-10-21 12:41:52.470046: step 7500, loss = 0.71 (11510.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:53.736996: step 7510, loss = 0.73 (18137.5 examples/sec; 0.028 sec/batch)
2017-10-21 12:41:54.352634: step 7520, loss = 0.68 (14596.7 examples/sec; 0.035 sec/batch)
2017-10-21 12:41:55.238686: step 7530, loss = 0.70 (11042.2 examples/sec; 0.046 sec/batch)
2017-10-21 12:41:56.106659: step 7540, loss = 0.72 (11738.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:41:56.970804: step 7550, loss = 0.70 (11302.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:41:57.835583: step 7560, loss = 0.76 (12907.6 examples/sec; 0.040 sec/batch)
2017-10-21 12:41:58.700842: step 7570, loss = 0.68 (11903.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:41:59.580402: step 7580, loss = 0.69 (10788.9 examples/sec; 0.047 sec/batch)
2017-10-21 12:42:00.465444: step 7590, loss = 0.68 (10719.0 examples/sec; 0.048 sec/batch)
2017-10-21 12:42:01.339826: step 7600, loss = 0.79 (10963.6 examples/sec; 0.047 sec/batch)
2017-10-21 12:42:02.613671: step 7610, loss = 0.69 (17358.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:42:03.224970: step 7620, loss = 0.74 (15898.5 examples/sec; 0.032 sec/batch)
2017-10-21 12:42:04.083755: step 7630, loss = 0.75 (11409.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:04.949622: step 7640, loss = 0.81 (11596.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:05.808659: step 7650, loss = 0.67 (11082.6 examples/sec; 0.046 sec/batch)
2017-10-21 12:42:06.685251: step 7660, loss = 0.68 (12193.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:07.550638: step 7670, loss = 0.71 (11152.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:42:08.427944: step 7680, loss = 0.76 (11586.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:09.298164: step 7690, loss = 0.64 (11878.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:10.172398: step 7700, loss = 0.70 (12529.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:42:11.454393: step 7710, loss = 0.83 (17808.0 examples/sec; 0.029 sec/batch)
2017-10-21 12:42:12.072589: step 7720, loss = 0.72 (19143.7 examples/sec; 0.027 sec/batch)
2017-10-21 12:42:12.939304: step 7730, loss = 0.69 (11934.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:13.811466: step 7740, loss = 0.66 (12248.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:14.701055: step 7750, loss = 0.71 (13373.1 examples/sec; 0.038 sec/batch)
2017-10-21 12:42:15.572595: step 7760, loss = 0.75 (11302.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:16.444253: step 7770, loss = 0.67 (11372.4 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:17.327892: step 7780, loss = 0.63 (12778.6 examples/sec; 0.040 sec/batch)
2017-10-21 12:42:18.201964: step 7790, loss = 0.73 (11562.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:19.085767: step 7800, loss = 0.69 (11736.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:20.355260: step 7810, loss = 0.80 (17383.3 examples/sec; 0.029 sec/batch)
2017-10-21 12:42:20.959087: step 7820, loss = 0.68 (16108.3 examples/sec; 0.032 sec/batch)
2017-10-21 12:42:21.847291: step 7830, loss = 0.74 (11726.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:22.722761: step 7840, loss = 0.69 (11738.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:23.587240: step 7850, loss = 0.78 (11718.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:24.469225: step 7860, loss = 0.72 (12449.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:42:25.339413: step 7870, loss = 0.73 (12867.1 examples/sec; 0.040 sec/batch)
2017-10-21 12:42:26.194735: step 7880, loss = 0.73 (12200.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:27.076677: step 7890, loss = 0.71 (11277.1 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:27.965047: step 7900, loss = 0.73 (12152.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:29.243647: step 7910, loss = 0.70 (17939.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:42:29.855417: step 7920, loss = 0.68 (16274.7 examples/sec; 0.031 sec/batch)
2017-10-21 12:42:30.710603: step 7930, loss = 0.74 (11969.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:31.592171: step 7940, loss = 0.79 (11815.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:32.471610: step 7950, loss = 0.67 (12382.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:42:33.348621: step 7960, loss = 0.59 (12108.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:34.229733: step 7970, loss = 0.80 (11233.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:42:35.100640: step 7980, loss = 0.71 (11801.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:35.978820: step 7990, loss = 0.69 (11617.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:36.850476: step 8000, loss = 0.79 (11468.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:38.315307: step 8010, loss = 0.65 (18333.6 examples/sec; 0.028 sec/batch)
2017-10-21 12:42:38.916012: step 8020, loss = 0.72 (17618.4 examples/sec; 0.029 sec/batch)
2017-10-21 12:42:39.777883: step 8030, loss = 0.74 (11033.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:42:40.686150: step 8040, loss = 0.75 (11596.3 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:41.568909: step 8050, loss = 0.71 (11646.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:42.447512: step 8060, loss = 0.76 (11723.9 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:43.326154: step 8070, loss = 0.81 (11553.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:44.200881: step 8080, loss = 0.71 (11559.8 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:45.093339: step 8090, loss = 0.63 (11294.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:45.956923: step 8100, loss = 0.63 (12043.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:47.232614: step 8110, loss = 0.78 (17014.0 examples/sec; 0.030 sec/batch)
2017-10-21 12:42:47.839123: step 8120, loss = 0.68 (14429.3 examples/sec; 0.035 sec/batch)
2017-10-21 12:42:48.713754: step 8130, loss = 0.80 (11473.3 examples/sec; 0.045 sec/batch)
2017-10-21 12:42:49.583477: step 8140, loss = 0.63 (11971.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:50.474617: step 8150, loss = 0.70 (12510.8 examples/sec; 0.041 sec/batch)
2017-10-21 12:42:51.350792: step 8160, loss = 0.68 (12421.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:42:52.224585: step 8170, loss = 0.82 (12121.3 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:53.086467: step 8180, loss = 0.77 (12353.6 examples/sec; 0.041 sec/batch)
2017-10-21 12:42:53.969907: step 8190, loss = 0.64 (12080.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:42:54.842656: step 8200, loss = 0.76 (11603.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:42:56.128028: step 8210, loss = 0.62 (17651.9 examples/sec; 0.029 sec/batch)
2017-10-21 12:42:56.723603: step 8220, loss = 0.70 (17229.8 examples/sec; 0.030 sec/batch)
2017-10-21 12:42:57.588868: step 8230, loss = 0.65 (11839.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:42:58.466794: step 8240, loss = 0.71 (11081.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:42:59.337659: step 8250, loss = 0.70 (11465.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:00.210833: step 8260, loss = 0.74 (12455.9 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:01.117504: step 8270, loss = 0.73 (11405.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:01.997010: step 8280, loss = 0.71 (10673.2 examples/sec; 0.048 sec/batch)
2017-10-21 12:43:02.863013: step 8290, loss = 0.87 (12176.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:03.737876: step 8300, loss = 0.63 (11855.1 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:05.015006: step 8310, loss = 0.64 (18123.6 examples/sec; 0.028 sec/batch)
2017-10-21 12:43:05.605639: step 8320, loss = 0.75 (17571.4 examples/sec; 0.029 sec/batch)
2017-10-21 12:43:06.466726: step 8330, loss = 0.72 (11634.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:07.341222: step 8340, loss = 0.62 (11388.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:08.212553: step 8350, loss = 0.68 (12414.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:09.090417: step 8360, loss = 0.71 (10956.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:43:09.969436: step 8370, loss = 0.74 (11228.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:43:10.855001: step 8380, loss = 0.77 (11534.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:11.733657: step 8390, loss = 0.76 (11828.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:12.605217: step 8400, loss = 0.69 (11811.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:13.883156: step 8410, loss = 0.65 (17857.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:43:14.482799: step 8420, loss = 0.69 (15061.5 examples/sec; 0.034 sec/batch)
2017-10-21 12:43:15.353779: step 8430, loss = 0.88 (12767.5 examples/sec; 0.040 sec/batch)
2017-10-21 12:43:16.234454: step 8440, loss = 0.68 (11816.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:17.108589: step 8450, loss = 0.75 (12652.4 examples/sec; 0.040 sec/batch)
2017-10-21 12:43:17.981279: step 8460, loss = 0.69 (12012.6 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:18.856837: step 8470, loss = 0.60 (11978.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:19.748379: step 8480, loss = 0.74 (12179.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:20.602513: step 8490, loss = 0.78 (13101.3 examples/sec; 0.039 sec/batch)
2017-10-21 12:43:21.504009: step 8500, loss = 0.81 (10765.2 examples/sec; 0.048 sec/batch)
2017-10-21 12:43:22.797922: step 8510, loss = 0.66 (18161.6 examples/sec; 0.028 sec/batch)
2017-10-21 12:43:23.399077: step 8520, loss = 0.73 (15695.4 examples/sec; 0.033 sec/batch)
2017-10-21 12:43:24.255853: step 8530, loss = 0.84 (12196.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:25.139022: step 8540, loss = 0.66 (11557.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:26.011212: step 8550, loss = 0.64 (11447.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:26.900828: step 8560, loss = 0.70 (11625.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:27.766728: step 8570, loss = 0.76 (11589.0 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:28.624096: step 8580, loss = 0.73 (11903.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:29.504350: step 8590, loss = 0.78 (12494.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:30.377281: step 8600, loss = 0.70 (12496.2 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:31.678299: step 8610, loss = 0.83 (17423.3 examples/sec; 0.029 sec/batch)
2017-10-21 12:43:32.272625: step 8620, loss = 0.68 (17701.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:43:33.147212: step 8630, loss = 0.75 (11020.2 examples/sec; 0.046 sec/batch)
2017-10-21 12:43:34.018413: step 8640, loss = 0.79 (10877.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:43:34.902964: step 8650, loss = 0.75 (11657.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:35.770240: step 8660, loss = 0.89 (11589.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:36.650750: step 8670, loss = 0.66 (12446.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:37.527853: step 8680, loss = 0.64 (12441.1 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:38.407606: step 8690, loss = 0.68 (10939.7 examples/sec; 0.047 sec/batch)
2017-10-21 12:43:39.277756: step 8700, loss = 0.72 (12323.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:40.549837: step 8710, loss = 0.64 (19141.7 examples/sec; 0.027 sec/batch)
2017-10-21 12:43:41.159931: step 8720, loss = 0.69 (16448.8 examples/sec; 0.031 sec/batch)
2017-10-21 12:43:42.024268: step 8730, loss = 0.66 (12373.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:43:42.911087: step 8740, loss = 0.74 (12742.8 examples/sec; 0.040 sec/batch)
2017-10-21 12:43:43.793509: step 8750, loss = 0.70 (10994.2 examples/sec; 0.047 sec/batch)
2017-10-21 12:43:44.653065: step 8760, loss = 0.72 (11783.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:43:45.522270: step 8770, loss = 0.74 (11708.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:46.397766: step 8780, loss = 0.64 (11276.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:47.277254: step 8790, loss = 0.64 (13683.7 examples/sec; 0.037 sec/batch)
2017-10-21 12:43:48.160389: step 8800, loss = 0.71 (12173.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:49.440341: step 8810, loss = 0.71 (16711.5 examples/sec; 0.031 sec/batch)
2017-10-21 12:43:50.042024: step 8820, loss = 0.64 (16277.4 examples/sec; 0.031 sec/batch)
2017-10-21 12:43:50.915955: step 8830, loss = 0.67 (11270.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:51.805631: step 8840, loss = 0.76 (12212.7 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:52.676538: step 8850, loss = 0.70 (12172.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:43:53.562616: step 8860, loss = 0.64 (10638.2 examples/sec; 0.048 sec/batch)
2017-10-21 12:43:54.448460: step 8870, loss = 0.67 (11341.2 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:55.301687: step 8880, loss = 0.64 (11685.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:56.187932: step 8890, loss = 0.71 (11338.0 examples/sec; 0.045 sec/batch)
2017-10-21 12:43:57.056646: step 8900, loss = 0.69 (11625.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:43:58.319984: step 8910, loss = 0.86 (17909.1 examples/sec; 0.029 sec/batch)
2017-10-21 12:43:58.922077: step 8920, loss = 0.64 (13321.9 examples/sec; 0.038 sec/batch)
2017-10-21 12:43:59.805027: step 8930, loss = 0.67 (11567.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:00.717953: step 8940, loss = 0.61 (12662.3 examples/sec; 0.040 sec/batch)
2017-10-21 12:44:01.595711: step 8950, loss = 0.76 (12121.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:02.489603: step 8960, loss = 0.80 (12254.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:03.376400: step 8970, loss = 0.63 (11210.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:44:04.256292: step 8980, loss = 0.68 (11185.9 examples/sec; 0.046 sec/batch)
2017-10-21 12:44:05.119581: step 8990, loss = 0.88 (11729.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:06.006945: step 9000, loss = 0.77 (10545.6 examples/sec; 0.049 sec/batch)
2017-10-21 12:44:07.420659: step 9010, loss = 0.68 (18624.2 examples/sec; 0.027 sec/batch)
2017-10-21 12:44:08.027871: step 9020, loss = 0.65 (13462.1 examples/sec; 0.038 sec/batch)
2017-10-21 12:44:08.904373: step 9030, loss = 0.73 (12196.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:09.796592: step 9040, loss = 0.67 (10964.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:44:10.669885: step 9050, loss = 0.62 (12294.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:11.564144: step 9060, loss = 0.65 (11977.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:12.441061: step 9070, loss = 0.64 (12202.4 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:13.327136: step 9080, loss = 0.67 (11662.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:14.199932: step 9090, loss = 0.75 (11669.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:15.069120: step 9100, loss = 0.64 (12029.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:16.340266: step 9110, loss = 0.67 (18949.3 examples/sec; 0.027 sec/batch)
2017-10-21 12:44:16.962321: step 9120, loss = 0.73 (16719.4 examples/sec; 0.031 sec/batch)
2017-10-21 12:44:17.839175: step 9130, loss = 0.67 (11158.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:44:18.722838: step 9140, loss = 0.63 (11904.0 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:19.604574: step 9150, loss = 0.71 (11958.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:20.490279: step 9160, loss = 0.64 (10888.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:44:21.378081: step 9170, loss = 0.74 (12149.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:22.246160: step 9180, loss = 0.56 (11231.1 examples/sec; 0.046 sec/batch)
2017-10-21 12:44:23.111046: step 9190, loss = 0.63 (11957.9 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:24.004923: step 9200, loss = 0.76 (10122.7 examples/sec; 0.051 sec/batch)
2017-10-21 12:44:25.271614: step 9210, loss = 0.68 (16278.4 examples/sec; 0.031 sec/batch)
2017-10-21 12:44:25.869005: step 9220, loss = 0.69 (18173.4 examples/sec; 0.028 sec/batch)
2017-10-21 12:44:26.753820: step 9230, loss = 0.64 (12161.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:27.667324: step 9240, loss = 0.83 (10225.1 examples/sec; 0.050 sec/batch)
2017-10-21 12:44:28.623522: step 9250, loss = 0.65 (10956.9 examples/sec; 0.047 sec/batch)
2017-10-21 12:44:29.642439: step 9260, loss = 0.69 (10384.7 examples/sec; 0.049 sec/batch)
2017-10-21 12:44:30.638247: step 9270, loss = 0.71 (9847.8 examples/sec; 0.052 sec/batch)
2017-10-21 12:44:31.635201: step 9280, loss = 0.64 (10703.4 examples/sec; 0.048 sec/batch)
2017-10-21 12:44:32.627205: step 9290, loss = 0.69 (10195.5 examples/sec; 0.050 sec/batch)
2017-10-21 12:44:33.610968: step 9300, loss = 0.67 (9535.9 examples/sec; 0.054 sec/batch)
2017-10-21 12:44:34.880394: step 9310, loss = 0.60 (17155.7 examples/sec; 0.030 sec/batch)
2017-10-21 12:44:35.573366: step 9320, loss = 0.71 (12111.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:36.460548: step 9330, loss = 0.61 (12676.7 examples/sec; 0.040 sec/batch)
2017-10-21 12:44:37.341135: step 9340, loss = 0.76 (11121.7 examples/sec; 0.046 sec/batch)
2017-10-21 12:44:38.221530: step 9350, loss = 0.73 (12881.6 examples/sec; 0.040 sec/batch)
2017-10-21 12:44:39.121454: step 9360, loss = 0.78 (12242.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:40.004480: step 9370, loss = 0.71 (11828.5 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:40.866319: step 9380, loss = 0.75 (11392.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:44:41.762528: step 9390, loss = 0.62 (12213.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:42.660796: step 9400, loss = 0.67 (10897.0 examples/sec; 0.047 sec/batch)
2017-10-21 12:44:43.945519: step 9410, loss = 0.61 (17371.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:44:44.554554: step 9420, loss = 0.68 (15999.7 examples/sec; 0.032 sec/batch)
2017-10-21 12:44:45.443186: step 9430, loss = 0.68 (11859.4 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:46.314230: step 9440, loss = 0.69 (12554.4 examples/sec; 0.041 sec/batch)
2017-10-21 12:44:47.193783: step 9450, loss = 0.68 (11748.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:48.084179: step 9460, loss = 0.66 (11814.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:48.968260: step 9470, loss = 0.71 (11279.5 examples/sec; 0.045 sec/batch)
2017-10-21 12:44:49.849057: step 9480, loss = 0.82 (12464.0 examples/sec; 0.041 sec/batch)
2017-10-21 12:44:50.727697: step 9490, loss = 0.62 (11847.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:44:51.616236: step 9500, loss = 0.68 (12166.8 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:52.909427: step 9510, loss = 0.68 (17415.2 examples/sec; 0.029 sec/batch)
2017-10-21 12:44:53.519675: step 9520, loss = 0.66 (17391.6 examples/sec; 0.029 sec/batch)
2017-10-21 12:44:54.397623: step 9530, loss = 0.69 (11345.9 examples/sec; 0.045 sec/batch)
2017-10-21 12:44:55.272407: step 9540, loss = 0.70 (11732.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:56.143175: step 9550, loss = 0.70 (11058.8 examples/sec; 0.046 sec/batch)
2017-10-21 12:44:57.031065: step 9560, loss = 0.68 (11518.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:44:57.919944: step 9570, loss = 0.65 (12213.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:44:58.803749: step 9580, loss = 0.64 (11303.7 examples/sec; 0.045 sec/batch)
2017-10-21 12:44:59.680515: step 9590, loss = 0.80 (12589.5 examples/sec; 0.041 sec/batch)
2017-10-21 12:45:00.602348: step 9600, loss = 0.63 (10539.4 examples/sec; 0.049 sec/batch)
2017-10-21 12:45:01.870054: step 9610, loss = 0.70 (17564.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:45:02.491226: step 9620, loss = 0.76 (13401.5 examples/sec; 0.038 sec/batch)
2017-10-21 12:45:03.372190: step 9630, loss = 0.70 (11148.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:45:04.237739: step 9640, loss = 0.67 (11464.8 examples/sec; 0.045 sec/batch)
2017-10-21 12:45:05.123733: step 9650, loss = 0.67 (11824.7 examples/sec; 0.043 sec/batch)
2017-10-21 12:45:06.018505: step 9660, loss = 0.61 (12109.2 examples/sec; 0.042 sec/batch)
2017-10-21 12:45:06.891952: step 9670, loss = 0.65 (12128.9 examples/sec; 0.042 sec/batch)
2017-10-21 12:45:07.764433: step 9680, loss = 0.64 (12079.0 examples/sec; 0.042 sec/batch)
2017-10-21 12:45:08.626462: step 9690, loss = 0.66 (11611.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:09.494394: step 9700, loss = 0.75 (12879.7 examples/sec; 0.040 sec/batch)
2017-10-21 12:45:10.769939: step 9710, loss = 0.68 (17441.7 examples/sec; 0.029 sec/batch)
2017-10-21 12:45:11.367310: step 9720, loss = 0.74 (17350.0 examples/sec; 0.030 sec/batch)
2017-10-21 12:45:12.253270: step 9730, loss = 0.71 (10215.0 examples/sec; 0.050 sec/batch)
2017-10-21 12:45:13.138044: step 9740, loss = 0.77 (11743.1 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:14.026193: step 9750, loss = 0.63 (11816.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:45:14.914625: step 9760, loss = 0.61 (12152.6 examples/sec; 0.042 sec/batch)
2017-10-21 12:45:15.796983: step 9770, loss = 0.73 (10888.5 examples/sec; 0.047 sec/batch)
2017-10-21 12:45:16.684787: step 9780, loss = 0.72 (11786.2 examples/sec; 0.043 sec/batch)
2017-10-21 12:45:17.573292: step 9790, loss = 0.66 (11574.2 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:18.467020: step 9800, loss = 0.77 (12324.5 examples/sec; 0.042 sec/batch)
2017-10-21 12:45:19.727433: step 9810, loss = 0.61 (18098.6 examples/sec; 0.028 sec/batch)
2017-10-21 12:45:20.343462: step 9820, loss = 0.62 (12813.7 examples/sec; 0.040 sec/batch)
2017-10-21 12:45:21.223228: step 9830, loss = 0.62 (11649.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:22.144504: step 9840, loss = 0.73 (10979.3 examples/sec; 0.047 sec/batch)
2017-10-21 12:45:23.046621: step 9850, loss = 0.66 (11173.5 examples/sec; 0.046 sec/batch)
2017-10-21 12:45:23.928478: step 9860, loss = 0.60 (11909.3 examples/sec; 0.043 sec/batch)
2017-10-21 12:45:24.805849: step 9870, loss = 0.64 (12159.1 examples/sec; 0.042 sec/batch)
2017-10-21 12:45:25.661789: step 9880, loss = 0.74 (11760.5 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:26.527829: step 9890, loss = 0.66 (12378.7 examples/sec; 0.041 sec/batch)
2017-10-21 12:45:27.406908: step 9900, loss = 0.71 (11656.6 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:28.711197: step 9910, loss = 0.61 (17626.8 examples/sec; 0.029 sec/batch)
2017-10-21 12:45:29.315606: step 9920, loss = 0.72 (16662.7 examples/sec; 0.031 sec/batch)
2017-10-21 12:45:30.191506: step 9930, loss = 0.66 (11154.3 examples/sec; 0.046 sec/batch)
2017-10-21 12:45:31.052132: step 9940, loss = 0.70 (11445.6 examples/sec; 0.045 sec/batch)
2017-10-21 12:45:31.933559: step 9950, loss = 0.77 (11944.8 examples/sec; 0.043 sec/batch)
2017-10-21 12:45:32.814685: step 9960, loss = 0.70 (11700.4 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:33.707847: step 9970, loss = 0.54 (10704.0 examples/sec; 0.048 sec/batch)
2017-10-21 12:45:34.563367: step 9980, loss = 0.69 (11764.7 examples/sec; 0.044 sec/batch)
2017-10-21 12:45:35.442545: step 9990, loss = 0.67 (11469.2 examples/sec; 0.045 sec/batch)

5)评估训练结果

In [86]:
CHECKPOINT_DIR='/mnt/data/software/models/tutorials/image/cifar10/train'
NUM_EXAMPLES=10000
BATCH_SIZE=256
eval_dir='/tmp/cifar10_eval'
run_once=True
eval_interval_secs=10
def eval_once(saver, summary_writer, top_k_op, summary_op):
  """Run Eval once.

  Args:
    saver: Saver.
    summary_writer: Summary writer.
    top_k_op: Top K op.
    summary_op: Summary op.
  """
  with tf.Session() as sess:
    ckpt = tf.train.get_checkpoint_state(CHECKPOINT_DIR)
    if ckpt and ckpt.model_checkpoint_path:
      # Restores from checkpoint
      saver.restore(sess, ckpt.model_checkpoint_path)
      # Assuming model_checkpoint_path looks something like:
      #   /my-favorite-path/cifar10_train/model.ckpt-0,
      # extract global_step from it.
      global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
    else:
      print('No checkpoint file found')
      return

    # Start the queue runners.
    coord = tf.train.Coordinator()
    try:
      threads = []
      for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
        threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                         start=True))

      num_iter = int(math.ceil(NUM_EXAMPLES / BATCH_SIZE))
      true_count = 0  # Counts the number of correct predictions.
      total_sample_count = num_iter * BATCH_SIZE
      step = 0
      while step < num_iter and not coord.should_stop():
        predictions = sess.run([top_k_op])
        true_count += np.sum(predictions)
        step += 1

      # Compute precision @ 1.
      precision = true_count / total_sample_count
      print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))

      summary = tf.Summary()
      summary.ParseFromString(sess.run(summary_op))
      summary.value.add(tag='Precision @ 1', simple_value=precision)
      summary_writer.add_summary(summary, global_step)
    except Exception as e:  # pylint: disable=broad-except
      coord.request_stop(e)

    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=10)
In [87]:
def evaluate():
  """Eval CIFAR-10 for a number of steps."""
  with tf.Graph().as_default() as g:
    # Get images and labels for CIFAR-10.
    images, labels = inputs(eval_data=True)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = inference(images)

    # Calculate predictions.
    #tf.nn.in_top_k Says whether the targets are in the top K predictions
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    summary_writer = tf.summary.FileWriter(eval_dir, g)

    while True:
      eval_once(saver, summary_writer, top_k_op, summary_op)
      if run_once:
        break
      time.sleep(eval_interval_secs)
In [88]:
evaluate()
2017-10-21 12:57:07.241680: precision @ 1 = 0.835
In [ ]: