我是机器学习新手,我正在尝试为整数序列模型开发一个简单的RNN(
Integer Sequence Learning - Kaggle Competition
)
我的数据集取自Kaggle竞赛,以下是我的代码:
import numpy
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import SimpleRNN
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.preprocessing.sequence import pad_sequences
# convert an array of values into a dataset matrix
def create_dataset(dataset, window_size=1):
dataX, dataY = [], []
for i in range(len(dataset)-window_size-1):
a = dataset[i:(i+window_size), 0]
dataX.append(a)
dataY.append(dataset[i + window_size, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
numpy.random.seed(7)
# loading data
dataframe = pd.read_csv('G:/Python/integer_sequencing/train.csv', usecols=[1], engine='python', skipfooter=3)
dataset = dataframe.values
#dataset = dataset.astype('float32')
train_size = int(len(dataset) * 0.67) #67 per cent used for training
test_size = len(dataset) - train_size # remaining used for testing
train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
#print(train_data.head(n=10))
# normalize the datasets
scaler_train = MinMaxScaler(feature_range=(0, 1))
scaler_test = MinMaxScaler(feature_range=(0, 1))
train = scaler_train.fit_transform(train)
test = scaler_test.fit_transform(test)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
print(trainX.shape)
print(trainY.shape)
我得到如下错误:
我该如何修复它?请帮忙。。。
PS-我正在使用
Time series prediction
作为示例