I'm starting to study deep learning and I'm practicing softmax classification with MNIST datset.
It was okay when I used sigmoid function, but cost didn't decrease using relu function.
Below is my full code.
from tensorflow import keras
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data(path="mnist.npz")
train_labels = tf.one_hot(train_labels, 10)
test_labels = tf.one_hot(test_labels, 10)
train_images, test_images = train_images / 255.0, test_images / 255.0
train_images = tf.reshape(train_images, (len(train_images), 28*28))
test_images = tf.reshape(test_images, (len(test_images), 28*28))
train_images = tf.cast(train_images, tf.float32)
test_images = tf.cast(test_images, tf.float32)
W1 = tf.Variable(tf.random.normal([28*28,50]), name='weight1', dtype=tf.float32)
b1 = tf.Variable(tf.random.normal([50]), name='bias1', dtype=tf.float32)
W2 = tf.Variable(tf.random.normal([50, 50]), name='weight2', dtype=tf.float32)
b2 = tf.Variable(tf.random.normal([50]), name='bias2', dtype=tf.float32)
W3 = tf.Variable(tf.random.normal([50,50]), name='weight3', dtype=tf.float32)
b3 = tf.Variable(tf.random.normal([50]), name='bias3', dtype=tf.float32)
W4 = tf.Variable(tf.random.normal([50,10]), name='weight4', dtype=tf.float32)
b4 = tf.Variable(tf.random.normal([10]), name='bias4', dtype=tf.float32)
train_ds = tf.data.Dataset.from_tensor_slices(
(train_images, train_labels)).batch(100)
epochs = 10
learning_rate = 0.1
cost_list = []
accuracy_list = []
for epoch in range(epochs):
avg_cost = 0
for images, labels in train_ds:
with tf.GradientTape() as tape:
layer1 = tf.nn.relu(tf.matmul(images, W1)+b1)
layer2 = tf.nn.relu(tf.matmul(layer1, W2)+b2)
layer3 = tf.nn.relu(tf.matmul(layer2, W3)+b3)
hypothesis = tf.nn.softmax(tf.matmul(layer3, W4)+b4)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels= labels))
predicted = tf.math.argmax(hypothesis, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, tf.math.argmax(labels, 1)), dtype=tf.float32))
W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad = tape.gradient(cost, [W1, b1, W2, b2, W3, b3, W4, b4])
#print(W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad)
#print(bbb)
W1.assign_sub(learning_rate * W1_grad)
b1.assign_sub(learning_rate * b1_grad)
W2.assign_sub(learning_rate * W2_grad)
b2.assign_sub(learning_rate * b2_grad)
W3.assign_sub(learning_rate * W3_grad)
b3.assign_sub(learning_rate * b3_grad)
W4.assign_sub(learning_rate * W4_grad)
b4.assign_sub(learning_rate * b4_grad)
avg_cost += cost / len(train_ds)
cost_list.append(avg_cost)
accuracy_list.append(accuracy.numpy())
print("Epoch: {}, cost: {}, accuracy: {}". format(epoch, avg_cost, accuracy.numpy()))
plt.plot(range(epochs), cost_list)
plt.show()
plt.plot(range(epochs), accuracy_list)
plt.show()
This is my result!
As you can see, cost value didn't decrease and accuracy is fluctuating.
Please help me!
question from:
https://stackoverflow.com/questions/66056671/tensorflow2-0-softmax-model-cost-does-not-decrease