Buscar

algoritmo_lstm_para_series_temporais

Prévia do material em texto

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_error
def evaluate_performance(y_true, y_pred,vetor_rmse,vetor_mae):
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
vetor_rmse.append(rmse)
vetor_mae.append(mae)
tabela_final = pd.read_csv('tabela_final.csv')
#tabela_final = pd.read_csv('tabela.csv')
#tabela_final = pd.read_csv('tabelaSV.csv')
tabela_final['data'] = pd.to_datetime(tabela_final['data'])
# Separando a base de treino
vetor_proporcoes = [0.3, 0.5, 0.7, 0.8]
vetor_string = ['30-70','50-50','70-30','80-20']
vetor_rmse = []
vetor_mae = []
cont = 0
for v in vetor_proporcoes:
limite = round(len(tabela_final) * v)
tabela_treinobase = tabela_final.iloc[0:limite]
tabela_treino_processada = tabela_treinobase.iloc[:, 1:2].values
tabela_testebase = tabela_final.iloc[limite:]
tabela_teste_processada = tabela_testebase.iloc[:, 1:2].values
x_data = tabela_testebase.iloc[:, 0:1].values
casos_total = pd.concat((tabela_treinobase, tabela_testebase),
axis=0)
casos_total = casos_total.drop(columns='data')
# Escalonando valores do treino
rangefor = 1
scaler = MinMaxScaler(feature_range = (0, 1))
cases_training_scaled =
scaler.fit_transform(tabela_treino_processada)
# Convert Training Data to Right Shape
features_set = []
labels = []
for i in range(rangefor, len(cases_training_scaled)):
features_set.append(cases_training_scaled[i-rangefor:i,:])
labels.append(cases_training_scaled[i,:])
features_set, labels = np.array(features_set), np.array(labels)
features_set = np.reshape(features_set, (features_set.shape[0],
features_set.shape[1], 1))
# Training The LSTM
model = Sequential()
# Creating LSTM and Dropout Layers
model.add(LSTM(units=50, return_sequences=True,
input_shape=(features_set.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
# Creating Dense Layer
model.add(Dense(units = 1))
# Model Compilation
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
# Algorithm Training
model.fit(features_set, labels, epochs = 100, batch_size = 32)
# Testing our LSTM
test_inputs = casos_total[len(casos_total) - len(tabela_testebase)
- rangefor:]['casosNovos'].values
test_inputs = test_inputs.reshape(-1,1)
test_inputs = scaler.transform(test_inputs)
test_features = []
for i in range(rangefor, len(test_inputs)):
test_features.append(test_inputs[i-rangefor:i, :])
test_features = np.array(test_features)
test_features = np.reshape(test_features, (test_features.shape[0],
test_features.shape[1], 1))
# Making Predictions
predictions = model.predict(test_features)
predictions = scaler.inverse_transform(predictions)
array_real = tabela_teste_processada[:,0].astype('int32')
array_pedc = predictions[:,0].astype('int32')
data1 = str(x_data[0]).replace('[\'','').split('T')
data2 = str(x_data[len(x_data)-1]).replace('[\'','').split('T')
plt.rcParams['xtick.labelsize'] = 18
plt.rcParams['ytick.labelsize'] = 18
plt.figure(figsize=(30,10))
plt.plot(x_data,array_real, color='blue', label='Casos Reais')
plt.plot(x_data,array_pedc, color='red', label='Casos Preditos')
plt.title('LSTM: Gráfico real e predito coronavírus Brasil ('+
str(data1[0]) + ' - ' + str(data2[0]) + ')',fontsize=22)
plt.xlabel('Data',fontsize=22)
plt.ylabel('Casos Novos',fontsize=22)
plt.legend(prop={"size":18})
plt.savefig("lstm_1/grafico_comp_" + vetor_string[cont] + ".png")
plt.close('all')
fig, axs = plt.subplots(1, 2, figsize=(12, 5))
axs[0].hist(array_real, bins=20, color='blue', alpha=0.5)
axs[0].set_xlabel('Valores Reais',fontsize=22)
axs[0].set_ylabel('Frequência',fontsize=22)
axs[0].set_title('Histograma dos Valores Reais',fontsize=22)
axs[1].hist(array_pedc, bins=20, color='red', alpha=0.5)
axs[1].set_xlabel('Valores Preditos',fontsize=22)
axs[1].set_ylabel('Frequência',fontsize=22)
axs[1].set_title('Histograma dos Valores Preditos',fontsize=22)
fig.suptitle('LSTM: Histograma real e preditivo coronavírus Brasil
('+ str(data1[0]) + ' - ' + str(data2[0]) + ')',fontsize=22)
plt.savefig("lstm_1/histograma_" + vetor_string[cont] + ".png")
plt.close('all')
evaluate_performance(array_real, array_pedc,vetor_rmse,vetor_mae)
cont = cont + 1
fig = plt.figure(figsize=(30,10))
y_std = np.std(vetor_rmse, ddof=1)
y_std2 = np.std(vetor_mae, ddof=1)
yerr = np.repeat(y_std, 4)
yerr2 = np.repeat(y_std2, 4)
plt.rcParams['xtick.labelsize'] = 22
plt.rcParams['ytick.labelsize'] = 22
plt.errorbar(vetor_string, vetor_rmse, yerr = yerr, label='RMSE')
plt.errorbar(vetor_string, vetor_mae, yerr = yerr2, label='MAE')
plt.legend(loc='lower right',fontsize=22)
plt.title('LSTM: RMSE e MAE',fontsize=22)
plt.xlabel('Base',fontsize=22)
plt.ylabel('Valor',fontsize=22)
plt.savefig("lstm_1/rmse-mae.png")
plt.close('all')

Continue navegando