Несоответствие формы вывода нейронной сети
Итак, я создаю свою первую простую нейронную сеть DQN. Но я действительно борюсь с выходной формой моей сети.
У меня есть вход со 139 функциями, что делает его input_shape=(None,139) и размером пакета 64. У меня есть 4 выхода для последнего слоя, так как моя среда имеет 4 возможных действия (0,1,2,3).
Но я получаю такую ошибку:
ValueError: Ошибка при проверке цели: ожидалось, что плотный_4 будет иметь форму (None, 1), но получил массив с формой (1, 4)
Это сводит меня с ума. Что я сделал не так?
def create_model(self):
model = Sequential()
model.add(Dense(128, input_shape=(None,139), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
#Model compile settings:
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
# Compile model
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
print(model.summary())
return model
Краткое описание модели:
Model: "sequential_7"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_23 (Dense) (None, None, 128) 17920
_________________________________________________________________
dropout_19 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_24 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_20 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_25 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_21 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_26 (Dense) (None, None, 4) 516
=================================================================
Total params: 51,460
Trainable params: 51,460
Non-trainable params: 0
_________________________________________________________________
None
Model: "sequential_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_27 (Dense) (None, None, 128) 17920
_________________________________________________________________
dropout_22 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_28 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_23 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_29 (Dense) (None, None, 128) 16512
_________________________________________________________________
dropout_24 (Dropout) (None, None, 128) 0
_________________________________________________________________
dense_30 (Dense) (None, None, 4) 516
=================================================================
Total params: 51,460
Trainable params: 51,460
Non-trainable params: 0
_________________________________________________________________
None
ОБНОВЛЕНИЕ С КОДОМ НИЖЕ. -DQN класс с моделью, поездом и т. Д.
class DQNAgent:
def __init__(self):
#main model # gets trained every step
self.model = self.create_model()
#Target model this is what we .predict against every step
self.target_model = self.create_model()
self.target_model.set_weights(self.model.get_weights())
self.replay_memory = deque(maxlen=REPLAY_MEMORY_SIZE)
#self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{MODEL_NAME}-{int(time.time())}")
self.target_update_counter = 0
def create_model(self):
model = Sequential()
model.add(Dense(128, input_shape=(None,139), activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(4, activation='softmax'))
#Model compile settings:
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
# Compile model
model.compile(
loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy']
)
print(model.summary())
return model
def update_replay_memory(self, transition):
self.replay_memory.append(transition)
def train(self, terminal_state):
global export
if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
return
minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
current_states = np.array([transition[0] for transition in minibatch])
current_qs_list = self.model.predict(current_states)
new_states = np.array([transition[3] for transition in minibatch])
future_qs_list = self.target_model.predict(new_states)
X = []
y = []
# Now we need to enumerate our batches
for index, (current_state, action, reward, new_state, done) in enumerate(minibatch):
# If not a terminal state, get new q from future states, otherwise set it to 0
# almost like with Q Learning, but we use just part of equation here
if not done:
max_future_q = np.max(future_qs_list[index])
new_q = reward + DISCOUNT * max_future_q
else:
new_q = reward
# Update Q value for given state
qs = current_qs_list[index]
print(qs, qs.shape)
qs[0,action] = new_q
# And append to our training data
X.append(current_state)
y.append(qs)
# Fit on all samples as one batch, log only on terminal state
self.model.fit(np.array(X), np.array(y), batch_size=MINIBATCH_SIZE, verbose=0, shuffle=False, callbacks=[tensorboard] if terminal_state else None)
# Update target network counter every episode
if done:
self.target_update_counter += 1
# If counter reaches set value, update target network with weights of main network
if self.target_update_counter > UPDATE_TARGET_EVERY:
self.target_model.set_weights(self.model.get_weights())
self.target_update_counter = 0
# Queries main network for Q values given current observation space (environment state)
def get_qs(self, state):
return self.model.predict(scaler.transform(np.array(state).reshape(-1, *state.shape)))[0]
agent = DQNAgent()
-
for i in range(EPOCHS):
print("EPOCH #", i, " starting, of ", EPOCHS, "epochs")
if i == EPOCHS - 1: # the last epoch, use test data set
current_state, xdata = preprocess(test_filename)
else:
current_state, xdata = preprocess(dataframe)
win_loss = 0
step = 1
# Iterate over episodes
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit="episodes"):
# Update tensorboard step every episode
# agent.tensorboard.step = episode
# Restarting episode - reset episode reward and step number
episode_reward = 0
# Reset flag and start iterating until episode ends
done = False
while not done:
done = are_we_done(current_state)
# This part stays mostly the same, the change is to query a model for Q values
if np.random.random() > epsilon:
# Get action from Q table
action = np.argmax(agent.get_qs(current_state))
# print("Q-value action")
action = action_check(current_state, action, orders)
else:
# Get random action
# print("Random action")
action = np.random.randint(0, 4)
action = action_check(current_state, action, orders)
(
new_state,
terminal_state,
win_loss,
close,
total_win_loss,
step,
orders,
) = to_market(current_state, action, step, win_loss)
reward = get_reward(win_loss, prev_win_loss)
episode_reward += reward
# Every step we update replay memory and train main network
scaled_current = (scaler.transform(current_state)).reshape(
1, current_state.shape[1]
)
scaled_new_state = (scaler.transform(new_state)).reshape(
1, new_state.shape[1]
)
agent.update_replay_memory(
(scaled_current, action, reward, scaled_new_state, done)
)
agent.train(done)
# step += 1
current_state = new_state
prev_win_loss = win_loss
if (
current_state.flatten()[3] == 23 and current_state.flatten()[4] >= 57
): # Close for trades between 23.57 and 00.15 due to swaps and crazy market
market_close = True
while market_close:
if (
current_state.flatten()[3] == 0
and current_state.flatten()[4] >= 15
):
market_close = False
else:
sleep(10)
market_close = False
# Append episode reward to a list and log stats (every given number of episodes)
ep_rewards.append(episode_reward)
with writer.as_default():
tf.summary.scalar("Reward", episode_reward, step=episode)
average_reward = sum(ep_rewards) / len(ep_rewards)
min_reward = min(ep_rewards)
max_reward = max(ep_rewards)
agent.tensorboard.update_stats(
reward_avg=average_reward,
reward_min=min_reward,
reward_max=max_reward,
epsilon=epsilon,
)
# Save model, but only when min reward is greater or equal a set value
if total_win_loss >= MIN_WIN_LOSS:
agent.model.save(f"models/{MODEL_NAME}__{SYMBOL}__{int(time.time())}.model")
# Decay epsilon
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(MIN_EPSILON, epsilon)
Print("EPOCH #", i, " done, of ", epochs, "epochs")