I created the following LSTM model and then I tried to find the best batch size. Surprisingly the model works very good for batch sizes of 2, 4, 8, 16, 32. But when it comes to a batch size of 1 suddenly the model gives disappointing results as if it doesn't learn anything. I tried to test different learning rates but it didn't work either. This is the model I am using:
time_step = 32
train_data_percentage = 0.8
n_epochs = 10
n_batch = 1
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Data/data.csv")
# coverty to numpy and scale
X = df.loc[:, 'b':'dm'].to_numpy()
Y = df.loc[:, 'dn'].to_numpy()
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
# reshape the data
X_dataset = np.zeros(((X.shape[0] - time_step) + 1, time_step, X.shape[1]))
Y_dataset = Y[time_step - 1 :Y.shape[0]]
# filling X_dataset
for i in range(0, X_dataset.shape[0]):
X_dataset[i] = X[i:i+time_step, :]
# shuffle the data
X_dataset = shuffle(X_dataset, random_state=42)
Y_dataset = shuffle(Y_dataset, random_state=42)
# splitting (train, validation)
train_data_len = int(np.ceil(train_data_percentage * X_dataset.shape[0]))
X_train = X_dataset[:train_data_len]
X_valid = X_dataset[train_data_len:]
Y_train = Y_dataset[:train_data_len]
Y_valid = Y_dataset[train_data_len:]
# defining the model
def Create_LSTM_Model(unit_per_layer=1000, drop_out=0.5, optimizer='Adam', lr=1e-3, reg=0.01):
model = Sequential()
model.add(LSTM(units=unit_per_layer, input_shape=(time_step, X_dataset.shape[2]), return_sequences=True, kernel_regularizer=l2(reg)))
model.add(Dropout(drop_out))
model.add(LSTM(units=unit_per_layer, kernel_regularizer=l2(reg)))
model.add(Dropout(drop_out))
model.add(Dense(units=unit_per_layer, activation='tanh', kernel_regularizer=l2(reg)))
model.add(Dropout(drop_out))
model.add(Dense(units=1, activation='sigmoid')) # Sigmoid is the only activation function compatible with the binary_crossentropy loss function.
if optimizer.upper()=='ADAM':
opti_func = Adam(lr=lr, amsgrad=True)
elif optimizer.upper()=='SGD':
opti_func = SGD(lr=lr)
elif optimizer.upper()=='RMSPROP':
opti_func = RMSprop(lr=lr)
model.compile(optimizer=opti_func, loss='binary_crossentropy', metrics=['binary_accuracy'])
return model
# create the model
model = Create_LSTM_Model(unit_per_layer=600, drop_out=0.5, optimizer = 'adam', lr=5e-4, reg=0.01)
# fit the model
history = model.fit(X_train, Y_train, epochs=n_epochs, batch_size=n_batch, validation_data=(X_valid, Y_valid))
This is the result of having a batch size of, for example, 2 after only 10 epoches:
and the dissappointing result of a batch size of 1 after 100 epoches:
question from:
https://stackoverflow.com/questions/65837847/why-does-my-lstm-model-work-for-any-batch-size-other-than-1 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…