我正在尝试拟合LSTM模型,并且在进行预测时,预测值似乎不太适合测试数据。可以在这里看到。
这是我的代码:
依存关系
import pandas as pd
from pandas_datareader import data as pdr
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
%matplotlib inline
#ML stuff
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
导入数据:
yf.pdr_override()
df = pdr.get_data_yahoo("0118.KL",start="2018-01-01",end="2020-04-30")
df.reset_index(inplace=True,drop=False)
df.columns = [str(x).lower().replace(' ','_') for x in df.columns]
df = df.drop(['open','high','low','close','volume'],axis=1)
df = df.set_index('date')
df.tail()
数据准备:
n_input = 10 #because I want to forecast 10 days into the future, use past 10 days to derive future value
n_features=1
train,test=df[:-n_input],df[-n_input:]
scaler = MinMaxScaler(feature_range=(0,1))
scaler.fit(train)
train = scaler.transform(train) #returns 2d array, of [inputs/time_steps,features] which is 1, because univariate timeseries
test = scaler.transform(test)
火车模型:
generator = TimeseriesGenerator(train,train,length=n_input,batch_size=128) #batch size means the number of samples shown before updating weight of NN
model=Sequential()
model.add(LSTM(200,activation='relu',input_shape=(n_input,n_features)))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mse')
model.fit_generator(generator,epochs=100)
预测:
pred_list = [] #list of arrays, in which each array has one value
batch = train[-n_input:].reshape(1,n_input,n_features)
#it takes the prediction,appends it to the next batch, then use it to predict the next value, like a moving window that updates
# with the predicted value
for i in range(n_input):
pred_list.append(model.predict(batch)[0]) #one value,returns a list of value, which has only one value actually
batch = np.append(batch[:,1:,:],[[pred_list[i]]],axis=1)#append to the end of the batch list,axis=1 column
df_predict = pd.DataFrame(scaler.inverse_transform(pred_list),index = df[-n_input:].index,columns=['Predictions'])
df_test = pd.concat([df,df_predict],axis=1)
显示上面的图形:
plt.figure(figsize=(12,4))
plt.plot(df_test['adj_close'][-n_input:])
plt.plot(df_test['Predictions'],color="r")
plt.show()
为何会这样呢?感谢您的阅读,不胜感激。