This is a classification model for ten categories of pictures. My code has three files, one is the CNN model convNet.py, one is read_TFRecord.py to read data, one is train.py to train and evaluation model. Training set of samples of 80,000, validation set of sample of 20,000.
Question:
In the first epoch:
training loss = 2.11, train accuracy = 25.61%
validation loss = 3.05, validation accuracy = 8.29%
Why validation loss are significantly different right from the start? And why the validation accuracy is always below 10%?
In the 10 epoch of training:
The training process is always in normal learning. The validation loss in the slow increase, the validation accuracy has been shock in about 10%. Is it over-fitting? But I have taken some measures, such as adding regularized losses, droupout. I do not know where the problem is. I hope you can help me.
convNet.py:
def convNet(features, mode):
input_layer = tf.reshape(features, [-1, 100, 100, 3])
tf.summary.image('input', input_layer)
# conv1
with tf.name_scope('conv1'):
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=5,
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
name='conv1'
)
conv1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'conv1')
tf.summary.histogram('kernel', conv1_vars[0])
tf.summary.histogram('bias', conv1_vars[1])
tf.summary.histogram('act', conv1)
# pool1 100->50
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2, name='pool1')
# dropout
pool1_dropout = tf.layers.dropout(
inputs=pool1, rate=0.5, training=tf.equal(mode, learn.ModeKeys.TRAIN), name='pool1_dropout')
# conv2
with tf.name_scope('conv2'):
conv2 = tf.layers.conv2d(
inputs=pool1_dropout,
filters=64,
kernel_size=5,
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
name='conv2'
)
conv2_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'conv2')
tf.summary.histogram('kernel', conv2_vars[0])
tf.summary.histogram('bias', conv2_vars[1])
tf.summary.histogram('act', conv2)
# pool2 50->25
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, name='pool2')
# dropout
pool2_dropout = tf.layers.dropout(
inputs=pool2, rate=0.5, training=tf.equal(mode, learn.ModeKeys.TRAIN), name='pool2_dropout')
# conv3
with tf.name_scope('conv3'):
conv3 = tf.layers.conv2d(
inputs=pool2_dropout,
filters=128,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
name='conv3'
)
conv3_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'conv3')
tf.summary.histogram('kernel', conv3_vars[0])
tf.summary.histogram('bias', conv3_vars[1])
tf.summary.histogram('act', conv3)
# pool3 25->12
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=[2, 2], strides=2, name='pool3')
# dropout
pool3_dropout = tf.layers.dropout(
inputs=pool3, rate=0.5, training=tf.equal(mode, learn.ModeKeys.TRAIN), name='pool3_dropout')
# conv4
with tf.name_scope('conv4'):
conv4 = tf.layers.conv2d(
inputs=pool3_dropout,
filters=128,
kernel_size=3,
padding="same",
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
name='conv4'
)
conv4_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'conv4')
tf.summary.histogram('kernel', conv4_vars[0])
tf.summary.histogram('bias', conv4_vars[1])
tf.summary.histogram('act', conv4)
# pool4 12->6
pool4 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2, name='pool4')
# dropout
pool4_dropout = tf.layers.dropout(
inputs=pool4, rate=0.5, training=tf.equal(mode, learn.ModeKeys.TRAIN), name='pool4_dropout')
pool4_flat = tf.reshape(pool4_dropout, [-1, 6 * 6 * 128])
# fc1
with tf.name_scope('fc1'):
fc1 = tf.layers.dense(inputs=pool4_flat, units=1024, activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01),
name='fc1')
fc1_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'fc1')
tf.summary.histogram('kernel', fc1_vars[0])
tf.summary.histogram('bias', fc1_vars[1])
tf.summary.histogram('act', fc1)
# dropout
fc1_dropout = tf.layers.dropout(
inputs=fc1, rate=0.3, training=tf.equal(mode, learn.ModeKeys.TRAIN), name='fc1_dropout')
# fc2
with tf.name_scope('fc2'):
fc2 = tf.layers.dense(inputs=fc1_dropout, units=512, activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01),
name='fc2')
fc2_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'fc2')
tf.summary.histogram('kernel', fc2_vars[0])
tf.summary.histogram('bias', fc2_vars[1])
tf.summary.histogram('act', fc2)
# dropout
fc2_dropout = tf.layers.dropout(
inputs=fc2, rate=0.3, training=tf.equal(mode, learn.ModeKeys.TRAIN), name='fc2_dropout')
# logits
with tf.name_scope('out'):
logits = tf.layers.dense(inputs=fc2_dropout, units=10, activation=None,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.01),
name='out')
out_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'out')
tf.summary.histogram('kernel', out_vars[0])
tf.summary.histogram('bias', out_vars[1])
tf.summary.histogram('act', logits)
return logits
read_TFRecord.py:
def read_and_decode(filename, width, height, channel):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
})
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [width, height, channel])
img = tf.cast(img, tf.float16) * (1. / 255) - 0.5
label = tf.cast(features['label'], tf.int16)
return img, label
train.py:
# step 1
TRAIN_TFRECORD = 'F:/10-image-set2/train.tfrecords' # train data set
VAL_TFRECORD = 'F:/10-image-set2/val.tfrecords' # validation data set
WIDTH = 100 # image width
HEIGHT = 100 # image height
CHANNEL = 3 # image channel
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 16
train_img, train_label = read_and_decode(TRAIN_TFRECORD, WIDTH, HEIGHT,
CHANNEL)
val_img, val_label = read_and_decode(VAL_TFRECORD, WIDTH, HEIGHT, CHANNEL)
x_train_batch, y_train_batch = tf.train.shuffle_batch([train_img,
train_label], batch_size=TRAIN_BATCH_SIZE,
capacity=80000,min_after_dequeue=79999,
num_threads=64,name='train_shuffle_batch')
x_val_batch, y_val_batch = tf.train.shuffle_batch([val_img, val_label],
batch_size=VAL_BATCH_SIZE,
capacity=20000,min_after_dequeue=19999,
num_threads=64, name='val_shuffle_batch')
# step 2
x = tf.placeholder(tf.float32, shape=[None, WIDTH, HEIGHT, CHANNEL],
name='x')
y_ = tf.placeholder(tf.int32, shape=[None, ], name='y_')
mode = tf.placeholder(tf.string, name='mode')
step = tf.get_variable(shape=(), dtype=tf.int32, initializer=tf.zeros_initializer(), name='step')
tf.add_to_collection(tf.GraphKeys.GLOBAL_STEP, step)
logits = convNet(x, mode)
with tf.name_scope('Reg_losses'):
reg_losses = tf.cond(tf.equal(mode, learn.ModeKeys.TRAIN),
lambda: tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)),
lambda: tf.constant(0, dtype=tf.float32))
with tf.name_scope('Loss'):
loss = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=logits) + reg_losses
train_op = tf.train.AdamOptimizer().minimize(loss, step)
correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
with tf.name_scope('Accuracy'):
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# step 3
tf.summary.scalar("reg_losses", reg_losses)
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", acc)
merged = tf.summary.merge_all()
# step 4
with tf.Session() as sess:
summary_dir = './logs/summary/'
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver = tf.train.Saver(max_to_keep=1)
train_writer = tf.summary.FileWriter(summary_dir + 'train',
sess.graph)
valid_writer = tf.summary.FileWriter(summary_dir + 'valid')
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
max_acc = 0
MAX_EPOCH = 10
for epoch in range(MAX_EPOCH):
# training
train_step = int(80000 / TRAIN_BATCH_SIZE)
train_loss, train_acc = 0, 0
for step in range(epoch * train_step, (epoch + 1) * train_step):
x_train, y_train = sess.run([x_train_batch, y_train_batch])
train_summary, _, err, ac = sess.run([merged, train_op, loss, acc],
feed_dict={x: x_train, y_: y_train,
mode: learn.ModeKeys.TRAIN,
global_step: step})
train_loss += err
train_acc += ac
if (step + 1) % 50 == 0:
train_writer.add_summary(train_summary, step)
print("Epoch %d,train loss= %.2f,train accuracy=%.2f%%" % (
epoch, (train_loss / train_step), (train_acc / train_step * 100.0)))
# validation
val_step = int(20000 / VAL_BATCH_SIZE)
val_loss, val_acc = 0, 0
for step in range(epoch * val_step, (epoch + 1) * val_step):
x_val, y_val = sess.run([x_val_batch, y_val_batch])
val_summary, err, ac = sess.run([merged, loss, acc],
feed_dict={x: x_val, y_: y_val, mode: learn.ModeKeys.EVAL,
global_step: step})
val_loss += err
val_acc += ac
if (step + 1) % 50 == 0:
valid_writer.add_summary(val_summary, step)
print(
"Epoch %d,validation loss= %.2f,validation accuracy=%.2f%%