Baixe o app para aproveitar ainda mais
Prévia do material em texto
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler from keras.models import Sequential from keras.layers import Dense from keras.layers import LSTM from keras.layers import Dropout from sklearn.metrics import mean_squared_error, mean_absolute_error def evaluate_performance(y_true, y_pred,vetor_rmse,vetor_mae): rmse = np.sqrt(mean_squared_error(y_true, y_pred)) mae = mean_absolute_error(y_true, y_pred) vetor_rmse.append(rmse) vetor_mae.append(mae) tabela_final = pd.read_csv('tabela_final.csv') #tabela_final = pd.read_csv('tabela.csv') #tabela_final = pd.read_csv('tabelaSV.csv') tabela_final['data'] = pd.to_datetime(tabela_final['data']) # Separando a base de treino vetor_proporcoes = [0.3, 0.5, 0.7, 0.8] vetor_string = ['30-70','50-50','70-30','80-20'] vetor_rmse = [] vetor_mae = [] cont = 0 for v in vetor_proporcoes: limite = round(len(tabela_final) * v) tabela_treinobase = tabela_final.iloc[0:limite] tabela_treino_processada = tabela_treinobase.iloc[:, 1:2].values tabela_testebase = tabela_final.iloc[limite:] tabela_teste_processada = tabela_testebase.iloc[:, 1:2].values x_data = tabela_testebase.iloc[:, 0:1].values casos_total = pd.concat((tabela_treinobase, tabela_testebase), axis=0) casos_total = casos_total.drop(columns='data') # Escalonando valores do treino rangefor = 1 scaler = MinMaxScaler(feature_range = (0, 1)) cases_training_scaled = scaler.fit_transform(tabela_treino_processada) # Convert Training Data to Right Shape features_set = [] labels = [] for i in range(rangefor, len(cases_training_scaled)): features_set.append(cases_training_scaled[i-rangefor:i,:]) labels.append(cases_training_scaled[i,:]) features_set, labels = np.array(features_set), np.array(labels) features_set = np.reshape(features_set, (features_set.shape[0], features_set.shape[1], 1)) # Training The LSTM model = Sequential() # Creating LSTM and Dropout Layers model.add(LSTM(units=50, return_sequences=True, input_shape=(features_set.shape[1], 1))) model.add(Dropout(0.2)) model.add(LSTM(units=50, return_sequences=True)) model.add(Dropout(0.2)) model.add(LSTM(units=50, return_sequences=True)) model.add(Dropout(0.2)) model.add(LSTM(units=50)) model.add(Dropout(0.2)) # Creating Dense Layer model.add(Dense(units = 1)) # Model Compilation model.compile(optimizer = 'adam', loss = 'mean_squared_error') # Algorithm Training model.fit(features_set, labels, epochs = 100, batch_size = 32) # Testing our LSTM test_inputs = casos_total[len(casos_total) - len(tabela_testebase) - rangefor:]['casosNovos'].values test_inputs = test_inputs.reshape(-1,1) test_inputs = scaler.transform(test_inputs) test_features = [] for i in range(rangefor, len(test_inputs)): test_features.append(test_inputs[i-rangefor:i, :]) test_features = np.array(test_features) test_features = np.reshape(test_features, (test_features.shape[0], test_features.shape[1], 1)) # Making Predictions predictions = model.predict(test_features) predictions = scaler.inverse_transform(predictions) array_real = tabela_teste_processada[:,0].astype('int32') array_pedc = predictions[:,0].astype('int32') data1 = str(x_data[0]).replace('[\'','').split('T') data2 = str(x_data[len(x_data)-1]).replace('[\'','').split('T') plt.rcParams['xtick.labelsize'] = 18 plt.rcParams['ytick.labelsize'] = 18 plt.figure(figsize=(30,10)) plt.plot(x_data,array_real, color='blue', label='Casos Reais') plt.plot(x_data,array_pedc, color='red', label='Casos Preditos') plt.title('LSTM: Gráfico real e predito coronavírus Brasil ('+ str(data1[0]) + ' - ' + str(data2[0]) + ')',fontsize=22) plt.xlabel('Data',fontsize=22) plt.ylabel('Casos Novos',fontsize=22) plt.legend(prop={"size":18}) plt.savefig("lstm_1/grafico_comp_" + vetor_string[cont] + ".png") plt.close('all') fig, axs = plt.subplots(1, 2, figsize=(12, 5)) axs[0].hist(array_real, bins=20, color='blue', alpha=0.5) axs[0].set_xlabel('Valores Reais',fontsize=22) axs[0].set_ylabel('Frequência',fontsize=22) axs[0].set_title('Histograma dos Valores Reais',fontsize=22) axs[1].hist(array_pedc, bins=20, color='red', alpha=0.5) axs[1].set_xlabel('Valores Preditos',fontsize=22) axs[1].set_ylabel('Frequência',fontsize=22) axs[1].set_title('Histograma dos Valores Preditos',fontsize=22) fig.suptitle('LSTM: Histograma real e preditivo coronavírus Brasil ('+ str(data1[0]) + ' - ' + str(data2[0]) + ')',fontsize=22) plt.savefig("lstm_1/histograma_" + vetor_string[cont] + ".png") plt.close('all') evaluate_performance(array_real, array_pedc,vetor_rmse,vetor_mae) cont = cont + 1 fig = plt.figure(figsize=(30,10)) y_std = np.std(vetor_rmse, ddof=1) y_std2 = np.std(vetor_mae, ddof=1) yerr = np.repeat(y_std, 4) yerr2 = np.repeat(y_std2, 4) plt.rcParams['xtick.labelsize'] = 22 plt.rcParams['ytick.labelsize'] = 22 plt.errorbar(vetor_string, vetor_rmse, yerr = yerr, label='RMSE') plt.errorbar(vetor_string, vetor_mae, yerr = yerr2, label='MAE') plt.legend(loc='lower right',fontsize=22) plt.title('LSTM: RMSE e MAE',fontsize=22) plt.xlabel('Base',fontsize=22) plt.ylabel('Valor',fontsize=22) plt.savefig("lstm_1/rmse-mae.png") plt.close('all')
Compartilhar