我能够使用时间步长构建LSTM来预测历史数据上的股票价格,但当我试图将其与DQNAgent等强化学习模型集成,以使用用户对预测是好是坏的反馈,然后用它来重新训练模型时,我在输入形状和维度方面遇到了多个错误。由于LSTM拟合的预期输入形状(考虑时间步长=100)与DQNAgent的输入形状(一次处理一个观测值)不同。我是第一次使用这两个模型,因此需要您的帮助来解决这个问题。以下是错误-
ValueError:检查输入时出错:预期lstm_input有3个维度,但得到的数组形状为(1,6)
代码:
定义创建LSTM模型的函数
def create_lstm_model(input_shape):
lstm_input = Input(shape=input_shape, name='lstm_input')
lstm_layer = LSTM(50, return_sequences=True)(lstm_input)
lstm_layer = Dropout(0.2)(lstm_layer)
lstm_layer = LSTM(50, return_sequences=True)(lstm_layer)
lstm_layer = Dropout(0.2)(lstm_layer)
lstm_layer = LSTM(50)(lstm_layer)
output = Dense(num_actions, activation='linear')(lstm_layer)
model = Model(inputs=lstm_input, outputs=output)
model.compile(loss='mean_squared_error', optimizer='adam')
return model
# Define function to preprocess data and create dataset for LSTM
def preprocess_data(dataset, time_step=1):
features = ['Open', 'Close'] # Specify the features to be used
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset[features])
dataX, dataY = [], []
for i in range(len(scaled_data) - time_step):
a = scaled_data[i:(i + time_step), :]
dataX.append(a)
dataY.append(scaled_data[i + time_step, :])
return np.array(dataX), np.array(dataY)
# Load and preprocess historical stock data
#idf = pd.read_csv('your_data.csv') # Replace 'your_data.csv' with your dataset file
x, y = preprocess_data(idf, time_step=100)
historical_data = idf
# Split data into training and testing sets
split_ratio = 0.9
split_index = int(len(x) * split_ratio)
x_train, x_test = x[:split_index], x[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
# Train the LSTM model
input_shape_lstm = (100, 2) # Adjust input shape based on the number of features
num_actions = 2 # Example: 0 - No action, 1 - Retrain LSTM
lstm_model = create_lstm_model(input_shape_lstm)
lstm_model.fit(x_train, y_train, epochs=10, batch_size=32, verbose=1)
# Define and initialize the environment
class StockPredictionEnvironment:
def __init__(self, lstm_model, historical_data):
self.lstm_model = lstm_model
self.historical_data = historical_data
self.current_step = 0
def reset(self):
self.current_step = 0
return self.get_state()
def step(self, action):
next_state = self.get_state()
reward = self.calculate_reward()
done = self.is_done()
return next_state, reward, done, {}
def get_state(self):
if self.current_step < len(self.historical_data):
state = self.historical_data.iloc[self.current_step]
self.current_step += 1
return state.values.reshape(-1) # Reshape state for compatibility with LSTM
else:
return None
def calculate_reward(self):
try:
predicted_price = self.lstm_model.predict(np.array([self.historical_data.iloc[self.current_step]]))[0][0]
actual_price = self.historical_data.iloc[self.current_step + 1][0]
reward = actual_price - predicted_price
return reward
except KeyError as e:
print(f"KeyError: {e}")
return 0
def is_done(self):
return self.current_step >= len(self.historical_data)
env = StockPredictionEnvironment(lstm_model, idf)
# Define and initialize the DQN agent
input_shape_dqn = (1, 6) # Adjust input shape based on the number of features
model = create_lstm_model(input_shape_dqn) # Use the same LSTM model architecture
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=num_actions, nb_steps_warmup=10)
dqn.compile(optimizer='adam', metrics=['mae'])
# Train the DQN agent
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)
# Evaluate the DQN agent
scores = dqn.test(env, nb_episodes=10, visualize=False)
print('Average score:', np.mean(scores.history['episode_reward']))