Baixe o app para aproveitar ainda mais
Prévia do material em texto
23/06/2020 15)22analiseImg02 Página 1 de 25about:srcdoc Análise de imagens by @Valnyr Lira junho de 2020 Campina Grande, PB 1. Introdução Avaliação das medidas (físicas e por imagem) como meio de estimar a massa das mangas. 2. Módulos externos Lista de módulos de funções externas importados. 23/06/2020 15)22analiseImg02 Página 2 de 25about:srcdoc In [1]: # Manipulação de dados import numpy as np import pandas as pd from scipy.stats import pearsonr # Visualização import matplotlib.pyplot as plt %matplotlib inline import seaborn as sns sns.set(style="ticks", color_codes=True) # Utils import os import sys import math import random from scipy import stats from time import time # Data science from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression, Lasso, Ridge from sklearn.model_selection import cross_val_score from sklearn import preprocessing from sklearn import metrics from sklearn.utils.extmath import density from sklearn.metrics import explained_variance_score, mean_absolute_error, mea n_squared_error, median_absolute_error 3. Carregar planilha de dados Selecionar planilha; Carregar planilha em dataframe; Apresentar informações; Apresentar estatísticas; Apresentar matriz de dispersão. 23/06/2020 15)22analiseImg02 Página 3 de 25about:srcdoc In [2]: # File name dataFile = 'dadosTotal.xlsx' sheetName01 = 'Lote01-S' sheetName02 = 'Lote02F-S' sheetName03 = 'Lote02V-S' sheetName04 = 'Lote02-S' sheetName05 = 'Geral' # Save dataframes totalData= pd.read_excel(dataFile, sheet_name=sheetName05) # Show dataframe totalData.head() Out[2]: Manga Cm Lm Am CM LM DEM PM AM CP LP DEP PP 0 m001.jpg 105 87 78 526.182736 432.685283 476.554213 1527.211 178367 526.2 432.6 476.5 1618.1 1 m002.jpg 99 81 74 497.958532 411.186969 451.971849 1440.062 160440 497.8 410.6 451.6 1512.9 2 m004.jpg 121 97 94 615.314822 487.930919 547.160355 1773.045 235136 616.2 489.1 548.2 1842.0 3 m006.jpg 100 83 75 503.096329 420.123126 459.545490 1474.545 165862 502.7 421.0 459.9 1537.2 4 m007.jpg 109 80 72 555.258973 404.311917 473.299580 1555.732 175939 555.5 405.9 474.3 1614.5 23/06/2020 15)22analiseImg02 Página 4 de 25about:srcdoc In [3]: # Show dataframe info totalData.info() In [4]: # Show dataframe summary totalData.describe() <class 'pandas.core.frame.DataFrame'> RangeIndex: 334 entries, 0 to 333 Data columns (total 16 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Manga 334 non-null object 1 Cm 334 non-null int64 2 Lm 334 non-null int64 3 Am 334 non-null int64 4 CM 334 non-null float64 5 LM 334 non-null float64 6 DEM 334 non-null float64 7 PM 334 non-null float64 8 AM 334 non-null int64 9 CP 334 non-null float64 10 LP 334 non-null float64 11 DEP 334 non-null float64 12 PP 334 non-null float64 13 AP 334 non-null int64 14 Mm 334 non-null float64 15 Classe 334 non-null object dtypes: float64(9), int64(5), object(2) memory usage: 41.9+ KB Out[4]: Cm Lm Am CM LM DEM PM AM count 334.000000 334.000000 334.000000 334.000000 334.000000 334.000000 334.000000 334.000000 mean 108.799401 87.278443 80.197605 548.889792 438.310515 489.559815 1660.822985 189085.910180 std 8.186556 5.022204 4.826403 44.087081 28.108910 32.955991 210.434096 25335.433851 min 75.000000 69.000000 68.000000 384.488583 324.121115 370.180604 1178.966000 107626.000000 25% 104.000000 84.000000 78.000000 522.282935 421.678524 469.646016 1554.166000 173233.250000 50% 108.000000 87.000000 80.000000 544.018871 439.386613 487.624303 1630.964500 186750.000000 75% 113.750000 90.000000 83.000000 573.839378 455.286558 510.177449 1716.211500 204424.250000 max 143.000000 110.000000 101.000000 722.148994 564.155313 613.552994 3259.125000 295661.000000 23/06/2020 15)22analiseImg02 Página 5 de 25about:srcdoc In [5]: totalData.columns In [6]: # Define columns measuredCols = ['Manga', 'Cm', 'Lm', 'Mm'] matlabCols = ['Manga', 'CM', 'LM', 'DEM', 'PM', 'AM', 'Mm'] pythonCols = ['Manga', 'CP', 'LP', 'DEP', 'PP', 'AP', 'Mm'] # Separate totalData measuredData = totalData[measuredCols] matlabData = totalData[matlabCols] pythonData = totalData[pythonCols] In [7]: # Show dataframes measuredData.head() matlabData.head() pythonData.head() In [8]: # Show pearson correlation on scatter matrix def reg_coef(X, y, label=None, color=None, **kwargs): ax = plt.gca() r, p = pearsonr(X, y) ax.annotate('r = {:.2f}'.format(r), xy=(0.5,0.9), xycoords='axes fraction' , ha='center') #ax.set_axis_off() Out[5]: Index(['Manga', 'Cm', 'Lm', 'Am', 'CM', 'LM', 'DEM', 'PM', 'AM', ' CP', 'LP', 'DEP', 'PP', 'AP', 'Mm', 'Classe'], dtype='object') Out[7]: Manga CP LP DEP PP AP Mm 0 m001.jpg 526.2 432.6 476.5 1618.1 178334 364.0 1 m002.jpg 497.8 410.6 451.6 1512.9 160147 303.5 2 m004.jpg 616.2 489.1 548.2 1842.0 236063 539.5 3 m006.jpg 502.7 421.0 459.9 1537.2 166085 328.0 4 m007.jpg 555.5 405.9 474.3 1614.5 176689 336.5 23/06/2020 15)22analiseImg02 Página 6 de 25about:srcdoc In [9]: # Plot scatter matrix for measured data gF = sns.pairplot(measuredData, diag_kind="auto", kind="reg", markers='d', cor ner=False, plot_kws={'line_kws':{'color':'red'}, 'scatter_kws': {'alpha' : 0.5}}) gF.map_lower(reg_coef) gF.map_upper(reg_coef) Out[9]: <seaborn.axisgrid.PairGrid at 0x1a19b50450> 23/06/2020 15)22analiseImg02 Página 7 de 25about:srcdoc In [10]: # Plot scatter matrix for Matlab data gM = sns.pairplot(matlabData, diag_kind="auto", kind="reg", markers='d', corne r=False, plot_kws={'line_kws':{'color':'red'}, 'scatter_kws': {'alpha' : 0.5}}) gM.map_lower(reg_coef) gM.map_upper(reg_coef) Out[10]: <seaborn.axisgrid.PairGrid at 0x1a1a138390> 23/06/2020 15)22analiseImg02 Página 8 de 25about:srcdoc In [11]: # Plot scatter matrix for Python data gP = sns.pairplot(pythonData, diag_kind="kde", kind="reg", markers='d', corner =False, plot_kws={'line_kws':{'color':'red'}, 'scatter_kws': {'alpha' : 0.5}}) gP.map_lower(reg_coef) gP.map_upper(reg_coef) Out[11]: <seaborn.axisgrid.PairGrid at 0x1a1b688950> 23/06/2020 15)22analiseImg02 Página 9 de 25about:srcdoc In [12]: measuredData.corr().style.format('{:.3f}').background_gradient(cmap=plt.get_cm ap('Dark2'), axis=1) In [13]: matlabData.corr().style.format('{:.3f}').background_gradient(cmap=plt.get_cmap ('Set1'), axis=1) In [14]: pythonData.corr().style.format('{:.3f}').background_gradient(cmap=plt.get_cmap ('Set2'), axis=1) Out[12]: Cm Lm Mm Cm 1.000 0.721 0.861 Lm 0.721 1.000 0.902 Mm 0.861 0.902 1.000 Out[13]: CM LM DEM PM AM Mm CM 1.000 0.709 0.937 0.368 0.939 0.826 LM 0.709 1.000 0.909 0.280 0.904 0.852 DEM 0.937 0.909 1.000 0.339 0.998 0.900 PM 0.368 0.280 0.339 1.000 0.349 0.557 AM 0.939 0.904 0.998 0.349 1.000 0.909 Mm 0.826 0.852 0.900 0.557 0.909 1.000 Out[14]: CP LP DEP PP AP Mm CP 1.000 0.726 0.948 0.824 0.947 0.865 LP 0.726 1.000 0.907 0.757 0.903 0.922 DEP 0.948 0.907 1.000 0.850 0.998 0.956 PP 0.824 0.757 0.850 1.000 0.849 0.825 AP 0.947 0.903 0.998 0.849 1.000 0.960 Mm 0.865 0.922 0.956 0.825 0.960 1.000 23/06/2020 15)22analiseImg02 Página 10 de 25about:srcdoc 4. Pré-processamento Padronizar os dados; Separar grupos de dados: treinamento / teste In [15]: # Choose data to process: measuredData, pythonData or matlabData data = pythonData #data = data[['Manga', 'Mm', 'CP']] # Split data on X and y X = data.drop(['Manga', 'Mm'], axis=1).values y = data['Mm'].values names = data['Manga'].values In [16]: # Standardization of inputs X_scaled = preprocessing.scale(X) In [17]: X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3 , random_state=21) In [18]: print('Size of training and testing sets:') print(' X: {}\n y: {}'.format(X.shape,y.shape)) print(' X train: {}\n y train: {}'.format(X_train.shape, y_train.shape)) print(' X test: {}\n y test: {}'.format(X_test.shape, y_test.shape)) Size of training and testing sets: X: (334, 5) y: (334,) X train: (233, 5) y train: (233,) X test: (101, 5) y test: (101,) 23/06/2020 15)22analiseImg02 Página 11 de 25about:srcdoc 5. Análise dos dados Aplicar regressão: Entradas: Isoladas Todas p-value aignificante Modelos: Linear; Ridge; Lasso In [19]: def run_regression(reg, X_train, X_test, y_train, y_test): # Get regression type reg_type = str(reg).split('(')[0] # Fit data and calculate respective duration t0 = time() reg.fit(X_train, y_train) train_time = time() - t0 # Get predicted y and calculate respective duration t0 = time() y_pred = reg.predict(X_test) test_time = time() - t0 # Get regression parameters params = np.round(np.append(reg.intercept_, reg.coef_), 2) # Save parameters names cf = [] for i, param in enumerate(params): if i == 0: cf = ['b'] else: cf += ['a' + str(i)] # Calculate statitiscs y_pred_train = reg.predict(X_train) newX = np.append(np.ones((len(X_train),1)), X_train, axis=1) MSE = (sum((y_train - y_pred_train)**2))/(len(newX) - newX.shape[1]) # var_p = MSE*(np.linalg.inv(np.dot(newX.T,newX)).diagonal()) std_p = np.sqrt(var_p) 23/06/2020 15)22analiseImg02 Página 12 de 25about:srcdoc ts_p = params/std_p p_values =[2*(1-stats.t.cdf(np.abs(i),(len(newX) - 1))) for i in ts_p] # Save statitiscs in dataframe stats_dic = {'Values': params, 'VAR': np.round(var_p, 3), 'STD': np.round(std_p, 3), 't': np.round(ts_p, 3), 'p-values': np.round(p_values, 3)} stats_df = pd.DataFrame(stats_dic, index = cf) # Calculate metrics r2_train = reg.score(X_train, y_train) ExVar_train = explained_variance_score(y_train, y_pred_train) MAE_train = mean_absolute_error(y_train, y_pred_train) MSE_train = mean_squared_error(y_train, y_pred_train) MdAE_train = median_absolute_error(y_train, y_pred_train) # r2_test = reg.score(X_test, y_test) ExVar_test = explained_variance_score(y_test, y_pred) MAE_test = mean_absolute_error(y_test, y_pred) MSE_test = mean_squared_error(y_test, y_pred) MdAE_test = median_absolute_error(y_test, y_pred) # Save metrics in dataframe met_dic = {'Train': np.round([r2_train, ExVar_train, MAE_train, MSE_train, MdAE_train, train_time], 3), 'Test': np.round([r2_test, ExVar_test, MAE_test, MSE_test, MdAE_test, test_time], 3)} met_cols = ['R2', 'ExtVar', 'MAE', 'MSE', 'MdAE', 'Duration'] met_df = pd.DataFrame(met_dic, index = met_cols) reg_result = {'Regressor': reg_type, 'Statistics': stats_df, 'Metrics': met_df, 'yPred': y_pred} return reg_result 23/06/2020 15)22analiseImg02 Página 13 de 25about:srcdoc In [20]: def show_regression(result): # Get results data reg_descr = result['Regressor'] stats = result['Statistics'] metrics = result['Metrics'] print('=' * 60) print('-' * 50) print('Regression model: {}'.format(reg_descr)) print('-' * 50) print('Statistics:') print(stats) print() print('Metrics:') print(metrics) print('=' * 60) print() 23/06/2020 15)22analiseImg02 Página 14 de 25about:srcdoc In [21]: def plot_regression(reg_data, y_test): dataCols = ['CP', 'LP', 'DEP', 'PP', 'AP'] fig1, axes = plt.subplots(5, 2, figsize=(10, 8), sharex = 'col', squeeze = True) ax = axes.flatten() for i in range(len(reg_data)): ypred = reg_data[i]['yPred'] r2 = reg_data[i]['Metrics']['Test'][0] mae = reg_data[i]['Metrics']['Test'][2] mse = reg_data[i]['Metrics']['Test'][3] mdae = reg_data[i]['Metrics']['Test'][4] erro = y_test - ypred mErro = np.mean(erro) stdErro = np.std(erro) score = f'Mean: {mErro:.2f}' yLabelPred = 'y_' + dataCols[i] yLabelErro = 'erro_' + dataCols[i] ax[2*i].scatter(y_test, ypred, label='yyy') ax[2*i].plot([200, 800], [200, 800], ls="--", c="red", label='xx') #ax[2*i].legend(loc='lower right') ax[2*i].set_ylabel(yLabelPred) ax[2*i].text(200, 600, 'R2 = ' + str(r2)) ax[8].set_xlabel('y_test') ax[2*i+1].scatter(range(len(erro)), erro) ax[2*i+1].axhline(y=mErro, color='r', linestyle='--') ax[2*i+1].set_ylabel(yLabelErro) ax[2*i+1].yaxis.tick_right() ax[2*i+1].yaxis.set_label_position("right") ax[9].set_xlabel('amostras') 23/06/2020 15)22analiseImg02 Página 15 de 25about:srcdoc In [22]: def plot_regression1(reg_data, y_test): fig1, axes = plt.subplots(1, 2, figsize=(10, 5)) ax = axes.flatten() ypred = reg_data['yPred'] r2 = reg_data['Metrics']['Test'][0] mae = reg_data['Metrics']['Test'][2] mse = reg_data['Metrics']['Test'][3] mdae = reg_data['Metrics']['Test'][4] erro = y_test - ypred mErro = np.mean(erro) stdErro = np.std(erro) score = f'Mean: {mErro:.2f}' yLabelPred = 'y_pred' yLabelErro = 'erro' ax[0].scatter(y_test, ypred, label='amostras') ax[0].plot([200, 800], [200, 800], ls="--", c="red", label='ideal') ax[0].legend(loc='lower right') ax[0].set_ylabel(yLabelPred) ax[0].text(200, 600, 'R2 = ' + str(r2)) ax[0].set_xlabel('y_test') ax[1].scatter(range(len(erro)), erro) ax[1].axhline(y=mErro, color='r', linestyle='--') ax[1].set_ylabel(yLabelErro) ax[1].yaxis.tick_right() ax[1].yaxis.set_label_position("right") ax[1].set_xlabel('amostras') In [23]: # Run simple regression for each input variable isolatedVar = [] # Chosse regression model rg = LinearRegression() # LinearRegression() / Ridge(alpha=0.1) / Lasso(alpha= 0.1) for i in range(X_test.shape[1]): isolatedVar.append(run_regression(rg, X_train[:, i:i+1], X_test[:, i:i+1], y_train, y_test)) show_regression(isolatedVar[i]) ============================================================ -------------------------------------------------- 23/06/2020 15)22analiseImg02 Página 16 de 25about:srcdoc Regression model: LinearRegression -------------------------------------------------- Statistics: Values VAR STD t p-values b 399.01 4.861 2.205 180.967 0.0 a1 61.34 4.962 2.228 27.537 0.0 Metrics: Train Test R2 0.767 0.710 ExtVar 0.767 0.716 MAE 26.698 31.612 MSE 1118.008 1735.489 MdAE 21.366 26.555 Duration 0.404 0.000 ============================================================ ============================================================ -------------------------------------------------- Regression model: LinearRegression -------------------------------------------------- Statistics: Values VAR STD t p-values b 400.31 3.333 1.826 219.256 0.0 a1 67.47 3.767 1.941 34.761 0.0 Metrics: Train Test R2 0.839 0.866 ExtVar 0.839 0.867 MAE 21.481 21.759 MSE 768.576 799.948 MdAE 17.199 19.224 Duration 0.001 0.000 ============================================================ ============================================================ -------------------------------------------------- Regression model: LinearRegression --------------------------------------------------Statistics: Values VAR STD t p-values b 398.97 1.603 1.266 315.109 0.0 a1 68.85 1.713 1.309 52.606 0.0 Metrics: Train Test R2 0.923 0.894 ExtVar 0.923 0.900 MAE 15.593 19.445 MSE 368.916 635.692 MdAE 13.741 17.038 Duration 0.001 0.000 ============================================================ 23/06/2020 15)22analiseImg02 Página 17 de 25about:srcdoc ============================================================ -------------------------------------------------- Regression model: LinearRegression -------------------------------------------------- Statistics: Values VAR STD t p-values b 401.02 3.868 1.967 203.902 0.0 a1 75.44 5.644 2.376 31.754 0.0 Metrics: Train Test R2 0.814 0.289 ExtVar 0.814 0.289 MAE 22.574 31.081 MSE 892.557 4253.377 MdAE 19.001 18.452 Duration 0.001 0.000 ============================================================ ============================================================ -------------------------------------------------- Regression model: LinearRegression -------------------------------------------------- Statistics: Values VAR STD t p-values b 399.17 1.483 1.218 327.833 0.0 a1 68.64 1.565 1.251 54.860 0.0 Metrics: Train Test R2 0.929 0.908 ExtVar 0.929 0.914 MAE 15.166 18.289 MSE 341.308 547.899 MdAE 12.915 16.029 Duration 0.001 0.000 ============================================================ 23/06/2020 15)22analiseImg02 Página 18 de 25about:srcdoc In [24]: plot_regression(isolatedVar, y_test) 23/06/2020 15)22analiseImg02 Página 19 de 25about:srcdoc In [25]: # Run multiple regression for all input variables allVar = [] allVar.append(run_regression(LinearRegression(), X_train, X_test, y_train, y_t est)) #allVar.append(run_regression(Ridge(alpha=0.1), X_train, X_test, y_train, y_te st)) #allVar.append(run_regression(Lasso(alpha=0.1), X_train, X_test, y_train, y_te st)) show_regression(allVar[0]) #show_regression(allVar[1]) #show_regression(allVar[2]) ============================================================ -------------------------------------------------- Regression model: LinearRegression -------------------------------------------------- Statistics: Values VAR STD t p-values b 399.59 1.193 1.092 365.784 0.000 a1 -49.86 3830.642 61.892 -0.806 0.421 a2 -16.72 2215.473 47.069 -0.355 0.723 a3 24.38 9476.127 97.345 0.250 0.802 a4 4.27 19.599 4.427 0.965 0.336 a5 103.66 339.763 18.433 5.624 0.000 Metrics: Train Test R2 0.944 0.941 ExtVar 0.944 0.944 MAE 12.958 14.801 MSE 266.836 354.967 MdAE 12.085 12.402 Duration 0.270 0.000 ============================================================ 23/06/2020 15)22analiseImg02 Página 20 de 25about:srcdoc In [26]: plot_regression1(allVar[0], y_test) In [27]: # Get inputs with p-value < 0.05 pvOK = allVar[0]['Statistics']['p-values'][allVar[0]['Statistics']['p-values'] < 0.05] inputs = [False, False, False, False, False] for i in pvOK.index: if i == 'a1': inputs[0] = True if i == 'a2': inputs[1] = True if i == 'a3': inputs[2] = True if i == 'a4': inputs[3] = True if i == 'a5': inputs[4] = True sl = [i for i, x in enumerate(inputs) if x == True] X_trainExt = X_train[:, sl] X_testExt = X_test[:, sl] In [28]: # Run multiple regression for significant p-values input variables bestVar = [] # LinearRegression() / Ridge(alpha=0.1) / Lasso(alpha=0.1) bestVar.append(run_regression(LinearRegression(), X_trainExt, X_testExt, y_tra in, y_test)) 23/06/2020 15)22analiseImg02 Página 21 de 25about:srcdoc In [29]: show_regression(bestVar[0]) In [30]: plot_regression1(bestVar[0], y_test) ============================================================ -------------------------------------------------- Regression model: LinearRegression -------------------------------------------------- Statistics: Values VAR STD t p-values b 399.17 1.483 1.218 327.833 0.0 a1 68.64 1.565 1.251 54.860 0.0 Metrics: Train Test R2 0.929 0.908 ExtVar 0.929 0.914 MAE 15.166 18.289 MSE 341.308 547.899 MdAE 12.915 16.029 Duration 0.001 0.000 ============================================================ 23/06/2020 15)22analiseImg02 Página 22 de 25about:srcdoc In [31]: def reg_summary(isoV, allV, bestV): # Adjust columns names colNames = ['CP', 'CP', 'LP', 'LP', 'DEP', 'DEP', 'PP', 'PP', 'AP', 'AP', 'All', 'All', 'Best', 'Best'] colNamesT = ['CP', 'LP', 'DEP', 'PP', 'AP', 'All', 'Best'] # Get statistics CPVs = isoV[0]['Statistics'] LPVs = isoV[1]['Statistics'] DEPVs = isoV[2]['Statistics'] PPVs = isoV[3]['Statistics'] APVs = isoV[4]['Statistics'] allVs = allV[0]['Statistics'] bestVs = bestV[0]['Statistics'] # Concat statistics statistcs = pd.concat([CPVs, LPVs, DEPVs, PPVs, APVs, allVs, bestVs], axis = 0) # Get metrics CPVm = isoV[0]['Metrics'] LPVm = isoV[1]['Metrics'] DEPVm = isoV[2]['Metrics'] PPVm = isoV[3]['Metrics'] APVm = isoV[4]['Metrics'] allVm = allV[0]['Metrics'] bestVm = bestV[0]['Metrics'] # Concat metrics metrics = pd.concat([CPVm, LPVm, DEPVm, PPVm, APVm, allVm, bestVm], axis = 1) metrics.drop(['Duration'], inplace = True) # Get train and test metrics metrics_train = metrics['Train'] metrics_train.columns = colNamesT metrics_test = metrics['Test'] metrics_test.columns = colNamesT cols = metrics.columns.tolist() for i, col in enumerate(cols): if (i%2) == 0: cols[i] = colNames[i] + cols[i] else: cols[i] = colNames[i] + cols[i] return metrics, metrics_train, metrics_test, statistcs 23/06/2020 15)22analiseImg02 Página 23 de 25about:srcdoc In [32]: met, met_train, met_test, res_stats = reg_summary(isolatedVar, allVar, bestVar ) met In [33]: met_train In [34]: met_test Out[32]: Train Test Train Test Train Test Train Test Train Test Train R2 0.767 0.710 0.839 0.866 0.923 0.894 0.814 0.289 0.929 0.908 0.944 ExtVar 0.767 0.716 0.839 0.867 0.923 0.900 0.814 0.289 0.929 0.914 0.944 MAE 26.698 31.612 21.481 21.759 15.593 19.445 22.574 31.081 15.166 18.289 12.958 MSE 1118.008 1735.489 768.576 799.948 368.916 635.692 892.557 4253.377 341.308 547.899 266.836 MdAE 21.366 26.555 17.199 19.224 13.741 17.038 19.001 18.452 12.915 16.029 12.085 Out[33]: CP LP DEP PP AP All Best R2 0.767 0.839 0.923 0.814 0.929 0.944 0.929 ExtVar 0.767 0.839 0.923 0.814 0.929 0.944 0.929 MAE 26.698 21.481 15.593 22.574 15.166 12.958 15.166 MSE 1118.008 768.576 368.916 892.557 341.308 266.836 341.308 MdAE 21.366 17.199 13.741 19.001 12.915 12.085 12.915 Out[34]: CP LP DEP PP AP All Best R2 0.710 0.866 0.894 0.289 0.908 0.941 0.908 ExtVar 0.716 0.867 0.900 0.289 0.914 0.944 0.914 MAE 31.612 21.759 19.445 31.081 18.289 14.801 18.289 MSE 1735.489 799.948 635.692 4253.377 547.899 354.967 547.899 MdAE 26.555 19.224 17.038 18.452 16.029 12.402 16.029 23/06/2020 15)22analiseImg02 Página 24 de 25about:srcdoc In [35]: res_stats In [36]: sheetName = 'analise_' + bestVar[0]['Regressor'] + '.xlsx' sheetName Out[35]: Values VAR STD t p-values b 399.01 4.861 2.205 180.967 0.000 a1 61.34 4.962 2.228 27.537 0.000 b 400.31 3.333 1.826 219.256 0.000 a1 67.47 3.767 1.941 34.761 0.000 b 398.97 1.603 1.266 315.109 0.000 a1 68.85 1.713 1.309 52.606 0.000 b 401.02 3.868 1.967 203.902 0.000 a1 75.44 5.644 2.376 31.7540.000 b 399.17 1.483 1.218 327.833 0.000 a1 68.64 1.565 1.251 54.860 0.000 b 399.59 1.193 1.092 365.784 0.000 a1 -49.86 3830.642 61.892 -0.806 0.421 a2 -16.72 2215.473 47.069 -0.355 0.723 a3 24.38 9476.127 97.345 0.250 0.802 a4 4.27 19.599 4.427 0.965 0.336 a5 103.66 339.763 18.433 5.624 0.000 b 399.17 1.483 1.218 327.833 0.000 a1 68.64 1.565 1.251 54.860 0.000 Out[36]: 'analise_LinearRegression.xlsx' 23/06/2020 15)22analiseImg02 Página 25 de 25about:srcdoc In [37]: with pd.ExcelWriter(sheetName, mode='w') as writer: met.to_excel(writer, sheet_name='metrics', float_format='%.3f') met_train.to_excel(writer, sheet_name='met_train', float_format='%.3f') met_test.to_excel(writer, sheet_name='met_test', float_format='%.3f') res_stats.to_excel(writer, sheet_name='stats', float_format='%.3f')
Compartilhar