Baixe o app para aproveitar ainda mais
Prévia do material em texto
GGPLOT Larissa Avila Matos 1/57 Conjunto de dados: USA Colleges data Para a análise vamos considerar o conjunto de dados disponível em James, Witten, Hastie e Tibshirani’s (2014) An Introduction to Statistical Learning (2014), que contém informações sobre faculdades nos EUA. college <- read.csv("College.csv", header=TRUE, row.names=1) str(college) 'data.frame': 777 obs. of 13 variables: $ Private : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ... $ Apps : int 1660 2186 1428 417 193 587 353 1899 1038 582 ... $ Accept : int 1232 1924 1097 349 146 479 340 1720 839 498 ... $ Enroll : int 721 512 336 137 55 158 103 489 227 172 ... $ Top10perc : int 23 16 22 60 16 38 17 37 30 21 ... $ Undergrad : int 3422 3910 1135 573 1118 719 646 1626 1279 877 ... $ P.Undergrad: int 16 31 9 11 78 6 36 2 24 9 ... $ Outstate : int 7440 12280 11250 12960 7560 13500 13290 13868 15595 10468 ... $ Other.Exp : int 5950 8700 5315 6775 6420 4510 7720 6126 5200 5840 ... $ PhD : int 70 29 53 92 76 67 90 89 79 40 ... $ S.F.Ratio : num 18.1 12.2 12.9 7.7 11.9 9.4 11.5 13.7 11.3 11.5 ... $ Expend : int 7041 10527 8735 19016 10922 9727 8861 11487 11644 8991 ... $ Grad.Rate : int 60 56 54 59 15 55 63 73 80 52 ... 2/57 Uma descrição do conjunto de dados é dada por: row.names tem os nomes da faculdade. Privado: indicador público/privado. Apps: Número de pedidos recebidos (inscrições) por 1000. Accept: Número de candidatos aceitos por 1000. Enroll: Número de novos alunos matriculados por 1000. Top10perc: Número de novos alunos dos 10% melhores do ensino médio. Undergrad: Número de alunos de graduação por 1000. P.Undergrad: Porcentagem de alunos de graduação em meio período. Outstate: Mensalidades fora do estado por 1000. Other.exp: Soma dos custos médios (moradia, livros, gastos pessoais, . . . ) por 1000. PhD: Percentagem de docentes com doutorado. S.F.Ratio: Relação aluno/docente. Expend: Despesas instrucionais por 1000. Grad.Rate: Taxa de Graduação. 3/57 require(ggplot2) college <- read.csv("/Users/Larissa/Downloads/Seminario_ggplot/College.csv", header=TRUE, row.names=1) college[,c(2,3,4,6,8,9,12)]<-college[,c(2,3,4,6,8,9,12)]/1000 attach(college) str(college) 'data.frame': 777 obs. of 13 variables: $ Private : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ... $ Apps : num 1.66 2.186 1.428 0.417 0.193 ... $ Accept : num 1.232 1.924 1.097 0.349 0.146 ... $ Enroll : num 0.721 0.512 0.336 0.137 0.055 0.158 0.103 0.489 0.227 0.172 ... $ Top10perc : int 23 16 22 60 16 38 17 37 30 21 ... $ Undergrad : num 3.422 3.91 1.135 0.573 1.118 ... $ P.Undergrad: int 16 31 9 11 78 6 36 2 24 9 ... $ Outstate : num 7.44 12.28 11.25 12.96 7.56 ... $ Other.Exp : num 5.95 8.7 5.32 6.78 6.42 ... $ PhD : int 70 29 53 92 76 67 90 89 79 40 ... $ S.F.Ratio : num 18.1 12.2 12.9 7.7 11.9 9.4 11.5 13.7 11.3 11.5 ... $ Expend : num 7.04 10.53 8.73 19.02 10.92 ... $ Grad.Rate : int 60 56 54 59 15 55 63 73 80 52 ... 4/57 g1 <- ggplot(college, aes(Private)) + geom_bar() g1 0 200 400 No Yes Private co u n t 5/57 g1.1 <- ggplot(college, aes(Private)) + geom_bar(col=c("#fb8072","#80b1d3"), ## contorno das barras fill=c("#fb8072","#80b1d3"), ## preenchimento barras alpha = 1) + labs(title="Gráfico de barras", x="", y="Frequência",caption="College data", subtitle="Variável Private") + theme(plot.title=element_text(size=15, family="sans", face="bold", lineheight=1.2, angle=0, hjust=0.5, vjust=0.5), ## Título plot.subtitle=element_text(size=10, face="plain"), ## Subtítulo plot.caption=element_text(size=10,color="blue"), axis.title.x=element_text(size=15), ## Título eixo X axis.title.y=element_text(size=15), ## Título eixo Y axis.text.x=element_text(size=10, angle = 30, vjust=.5), ## Texto eixo X axis.text.y=element_text(size=10) ## Texto eixo Y ) 6/57 g1.1 0 200 400 No Yes F re q u ê n ci a Variável Private Gráfico de barras College data 7/57 g2 <- ggplot(college, aes(x = factor(1), fill = Private)) + geom_bar(width = 0.5) + coord_polar(theta = "y") + scale_fill_manual(values=c("#fb8072","#80b1d3")) ## ou scale_fill_brewer(palette="Blues") g2 g2.1 <- g2 + theme_void() g2.1 0 200 400 600 1 count fa c to r( 1 ) Private No Yes g2 Private No Yes g2.1 http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3 8/57 http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3 g3 <- ggplot(college, aes(S.F.Ratio)) + geom_histogram() g3 0 40 80 120 10 20 30 40 S.F.Ratio co u n t 9/57 g3.1 <- ggplot(college, aes(S.F.Ratio)) + geom_histogram(breaks=seq(0, 41, by = 1.3), col="black", ## contorno das barras fill="#fb8072", ## preenchimento barras alpha = 1) + labs(title="Histograma para a Relação aluno/docente") + labs(x="Razão", y="Frequência") + xlim(c(0,41)) + ylim(c(0,121)) g3.1 0 25 50 75 100 125 0 10 20 30 40 Razão Fr eq uê nc ia Histograma para a Relação aluno/docente 10/57 g3.2 <- ggplot(college, aes(S.F.Ratio)) + geom_histogram(aes(y = ..density..),binwidth = 1, fill = "#fb8072", color = "black") g3.2 0.00 0.05 0.10 10 20 30 40 S.F.Ratio d e n si ty 11/57 x <- seq(0, 41, length.out=100) df <- data.frame(x = x, y = dnorm(x, mean(college$S.F.Ratio), sd(college$S.F.Ratio))) g3.2 <- g3.2 + geom_line(data = df, aes(x = x, y = y), color = "black", size=1) g3.2 0.00 0.05 0.10 0 10 20 30 40 S.F.Ratio d e n si ty 12/57 x <- seq(-4, 4, length.out=100) df <- data.frame(x = x, y = dnorm(x)) g3.3 <- ggplot(college, aes((S.F.Ratio-mean(S.F.Ratio))/sd(S.F.Ratio))) + geom_histogram(aes(y = ..density..),binwidth = 0.5, fill = "#fb8072", color = "black") + geom_line(data = df, aes(x = x, y = y), color = "black", size=1) g3.3 0.0 0.1 0.2 0.3 0.4 0.5 −2.5 0.0 2.5 5.0 (S.F.Ratio − mean(S.F.Ratio))/sd(S.F.Ratio) de ns ity 13/57 library(purrr) library(tidyr) college %>% keep(is.numeric) %>% gather() %>% ggplot(aes(value)) + facet_wrap(~ key, scales = "free") + geom_histogram(col="black", fill="#fb8072", alpha = 0.7) + labs(y="Frequência") 14/57 PhD S.F.Ratio Top10perc Undergrad Grad.Rate Other.Exp Outstate P.Undergrad Accept Apps Enroll Expend 0 25 50 75 100 10 20 30 40 0 25 50 75 100 0 10 20 30 40 25 50 75 100 125 5.0 7.5 10.0 12.5 5 10 15 20 0 25 50 75 0 10 20 0 10 20 30 40 50 0 2 4 6 0 10 20 30 40 50 60 0 50 100 150 200 0 25 50 75 0 100 200 300 0 100 200 0 20 40 60 0 25 50 75 0 100 200 300 0 25 50 75 100 0 40 80 120 0 100 200 300 0 20 40 60 0 20 40 60 80 value F re q u ê n ci a 15/57 g4 <- ggplot(college, aes(x = "", y = S.F.Ratio)) + geom_boxplot() g4 10 20 30 40 x S. F. R at io 16/57 g4.1 <- ggplot(college, aes(x = "", y = S.F.Ratio)) + geom_boxplot(fill="#80b1d3",alpha=1) + coord_flip() + labs(title="Boxplot para a Relação aluno/docente") + labs(x="", y="Razão") g4.1 10 20 30 40 Razão Boxplot para a Relação aluno/docente 17/57 g5 <- ggplot(college, aes(x = Private, y = S.F.Ratio)) + geom_boxplot(fill=c("#fb8072","#80b1d3"),alpha=1) + geom_hline(yintercept = median(S.F.Ratio), colour="black", linetype = "dotted") g5 g5.1 <- g5 + theme_bw() g5.1 10 20 30 40 No Yes Private S. F.R at io g5 10 20 30 40 No Yes Private S. F.R at io g5.1 18/57 g6 <- ggplot(college, aes(x ="", y = S.F.Ratio)) + geom_boxplot(fill=c("#fb8072","#80b1d3"),alpha=1) + facet_wrap(~ Private, scales="free_y") + labs(x="", y="Razão") g6 No Yes 10 20 30 40 10 15 20 25 R az ão 19/57 library(purrr) library(tidyr) college %>% keep(is.numeric) %>% gather() %>% ggplot(aes(x="", y=value)) + facet_wrap(~ key, scales = "free") + geom_boxplot(col="black", fill="#80b1d3", alpha = 0.7) + labs(y="Frequência") 20/57 PhD S.F.Ratio Top10perc Undergrad Grad.Rate Other.Exp Outstate P.Undergrad Accept Apps Enroll Expend 10 20 30 40 50 0 25 50 75 0 10 20 30 40 0 2 4 6 5 10 15 20 0 25 50 75 100 0 10 20 30 40 50 5.0 7.5 10.0 12.5 10 20 30 40 0 10 20 30 60 90 120 25 50 75 100 21/57 g7 <- ggplot(college,aes(x = Apps, y = Accept)) + geom_point(size = 2) + geom_smooth(method="loess", se=T) + ggtitle('Número de inscrições por número de candidatos aceitos') g7 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Número de inscrições por número de candidatos aceitos 22/57 g8 <- ggplot(college, aes(x = Apps, y = Accept, colour = Private)) + geom_point(size = 2, alpha = 0.7) + scale_color_manual(values = c("Yes"="#80b1d3","No"="#fb8072")) + ggtitle('Número de inscrições por número de candidatos aceitos') g8 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Private No Yes Número de inscrições por número de candidatos aceitos 23/57 g8 + geom_smooth(method="loess", se=F) 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Private No Yes Número de inscrições por número de candidatos aceitos 24/57 g8 + coord_flip() 0 10 20 30 40 50 0 10 20 Accept Ap ps Private No Yes Número de inscrições por número de candidatos aceitos 25/57 g8 + scale_x_reverse() + scale_y_reverse() 0 10 20 01020304050 Apps Ac ce pt Private No Yes Número de inscrições por número de candidatos aceitos 26/57 college_n <- college[college$Apps > 20 & college$Private == "Yes", ] g8.1 <- g8 + geom_text(aes(label=row.names(college_n)), size=4, data=college_n) + theme(legend.position = "None") g8.1 Boston University 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Número de inscrições por número de candidatos aceitos 27/57 college_n <- college[college$Apps > 20 & college$Private == "Yes", ] g8.2 <- g8 + geom_label(aes(label=row.names(college_n)), size=4, data=college_n, alpha=0.25) + theme(legend.position = "None") g8.2 Boston University 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Número de inscrições por número de candidatos aceitos 28/57 library(ggalt) college_n <- college[college$Apps > 21, ] g8.3 <- g8 + geom_encircle(aes(x = Apps, y = Accept), data=college_n, color="black", size=1, expand=0.05) + theme(legend.position = "None")+ xlim(c(0,50)) + ylim(c(0,28)) g8.3 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Número de inscrições por número de candidatos aceitos 29/57 g9 <- ggplot(college, aes(x = Apps, y = Accept, colour = Private)) + geom_point(size = 1.5) + scale_color_manual(values = c("Yes"="#80b1d3","No"="#fb8072")) + facet_wrap(~ Private) + ggtitle('Número de inscrições por número de candidatos aceitos') g9 No Yes 0 10 20 30 40 50 0 10 20 30 40 50 0 10 20 Apps Ac ce pt Private No Yes Número de inscrições por número de candidatos aceitos 30/57 g10 <- ggplot(college, aes(x = Apps, y = Accept)) + scale_color_manual(values = c("Yes"="#80b1d3","No"="#fb8072")) + geom_point(data=college, aes(colour = Private, size=Expend)) g10 0 10 20 0 10 20 30 40 50 Apps Ac ce pt Private No Yes Expend 10 20 30 40 50 31/57 library(ggExtra) g11 <- ggplot(college, aes(x = Apps, y = Accept)) + geom_point(size=2,alpha=0.1) ## ggMarginal(g11, type = "boxplot", fill="transparent") Boxplot ggMarginal(g11, type = "histogram", fill="transparent") ## Histograma 0 10 20 0 10 20 30 40 50 Apps Ac ce pt 32/57 library(ggcorrplot) corr <- round(cor(college[2:13]), 1) ## Matriz de correlação ggcorrplot(corr, hc.order = TRUE, type = "upper", lab = TRUE, lab_size = 3, method="circle",colors = c("#fb8072","white","#80b1d3")) 0.2 0.3 0.3 0.6 0.4 0.4 0.5 0.3 0.5 0.6 0.4 0.4 0.4 0.7 0.7 −0.2 0 −0.3 −0.3 −0.4 −0.3 −0.3 −0.3 −0.1 −0.6 −0.4 −0.6 0.1 0.1 0.3 0.4 0.1 0.3 0.3 −0.2 0.1 0.1 0.2 0.4 0 0.2 0.1 −0.2 0.2 0.9 0 0.1 0.3 −0.2 0.2 0.1 −0.1 0.2 0.8 0.9 −0.1 0.1 0.3 −0.2 0.1 0 0 0.3 0.8 0.8 0.9 Other.Exp PhD Outstate Top10perc Expend P.Undergrad S.F.Ratio Apps Accept Enroll Undergrad Gr ad .R ate Ot he r.E xp Ph D Ou tst ate To p1 0p er c Ex pe nd P. Un de rg rad S. F.R ati o Ap ps Ac ce pt En ro ll −1.0 −0.5 0.0 0.5 1.0 Corr 33/57 collegeNew<-college[1:40,] collegeNew$Uname <- rownames(collegeNew) collegeNew$AppsN <- round((collegeNew$Apps - mean(collegeNew$Apps))/sd(collegeNew$Apps), 2) collegeNew$AppsTipo <- ifelse(collegeNew$AppsN < 0, "below", "above") collegeNew <- collegeNew[order(collegeNew$AppsN), ] collegeNew$Uname <- factor(collegeNew$Uname, levels = collegeNew$`Uname`) # Diverging Barcharts g12 <- ggplot(collegeNew, aes(x=Uname, y=AppsN, label=AppsN)) + geom_bar(stat='identity', aes(fill=AppsTipo), width=.5) + scale_fill_manual(name="", labels = c("Acima da Média", "Abaixo da Média"), values = c("above"="#80b1d3", "below"="#fb8072")) + labs(y="Número de Inscrições",x="") + ylim(c(-1, 5)) + facet_wrap(~ Private) + theme(legend.position="bottom", legend.box = "horizontal") + coord_flip() 34/57 g12 No Yes 0 2 4 0 2 4 Alaska Pacific University Barat College Albertus Magnus College Agnes Scott College Alverno College Albertson College Alderson−Broaddus College Aquinas College Baker University Averett College Augsburg College Antioch University Arkansas College (Lyon College) Augustana College Austin College Barry University Albright College Andrews University Allentown Coll. of St. Francis de Sales Anderson University Alma College Adrian College American International College Abilene Christian University Baldwin−Wallace College Alfred University Arkansas Tech University Augustana College IL Albion College Bard College Assumption College Adelphi University Barnard College Allegheny College Angelo State University Amherst College Baylor University Appalachian State University Auburn University−Main Campus Arizona State University Main campus Número de Inscrições Acima da Média Abaixo da Média 35/57 Dados Longitudinais Dados: Curvas de crescimento de porcos. Estes são dados longitudinais de um experimento fatorial. A variável resposta é o peso de cada porco, e a única variável preditora que usaremos aqui é “tempo”. data(dietox, package='geepack') dados_pig <- dietox str(dados_pig) 'data.frame': 861 obs. of 7 variables: $ Weight: num 26.5 27.6 36.5 40.3 49.1 ... $ Feed : num NA 5.2 17.6 28.5 45.2 ... $ Time : int 1 2 3 4 5 6 7 8 9 10 ... $ Pig : int 4601 4601 4601 4601 4601 4601 4601 4601 4601 4601 ... $ Evit : int 1 1 1 1 1 1 1 1 1 1 ... $ Cu : int 1 1 1 1 1 1 1 1 1 1 ... $ Litter: int 1 1 1 1 1 1 1 1 1 1 ... 36/57 dados_pig[1:15,] Weight Feed Time Pig Evit Cu Litter 1 26.50000 NA 1 4601 1 1 1 2 27.59999 5.200005 2 4601 1 1 1 3 36.50000 17.600000 3 4601 1 1 1 4 40.29999 28.500000 4 4601 1 1 1 5 49.09998 45.200001 5 4601 1 1 1 6 55.39999 56.900002 6 4601 1 1 1 7 59.59998 71.700005 7 4601 1 1 1 8 67.00000 86.800001 8 4601 1 1 1 9 76.59998 104.900002 9 4601 1 1 1 10 86.50000 123.000000 10 4601 1 1 1 11 91.59998 140.900002 11 4601 1 1 1 12 98.59998 160.000000 12 4601 1 1 1 13 27.00000 NA 1 4643 1 1 2 14 31.79999 6.400002 2 4643 1 1 2 15 39.00000 21.500000 3 4643 1 1 2 length(unique(dados_pig$Pig)) [1] 72 37/57 g13 <- ggplot(data=dados_pig, aes(x=Time, y=Weight, group=Pig)) + geom_line() + geom_point(size=2) + labs(x="Tempo", y="Peso") + theme_bw() g13 30 60 90 120 2.5 5.0 7.5 10.0 12.5 Tempo Pe so 38/57 data_ps <- rbind(dados_pig[dados_pig$Pig=="4641",], dados_pig[dados_pig$Pig=="4643",], dados_pig[dados_pig$Pig=="4760",]) colorind<-c(rep("#80b1d3",12),rep("black",12),rep("#fb8072",12)) g14 <- ggplot(data=dados_pig, aes(x=Time, y=Weight, group=Pig)) + geom_line(colour="gray80") + geom_point(size=2, colour="gray80") + geom_line(data=data_ps, aes(x=Time, y=Weight, group=Pig), colour=colorind) + geom_point(data=data_ps, aes(x=Time, y=Weight, group=Pig), size=2, colour=colorind) + geom_text(aes(12.5, dados_pig[dados_pig$Pig=="4760" & dados_pig$Time==12,1], label="4760"), colour="#fb8072", size=4) + geom_text(aes(12.5, dados_pig[dados_pig$Pig=="4641" & dados_pig$Time==12,1], label="4641"), colour="#80b1d3", size= 4) + geom_text(aes(12.5, dados_pig[dados_pig$Pig=="4643" & dados_pig$Time==12,1], label="4643"), colour="black", size = 4) + labs(x="Tempo", y="Peso") + xlim(c(1, 13)) + theme_bw() 39/57 g14 476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760476047604760464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641464146414641 464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643464346434643 30 60 90 120 5 10 Tempo P es o 40/57 g15 <- ggplot(data=dados_pig, aes(x=Time, y=Weight, group=Pig)) + geom_line(colour="gray80") + geom_point(size=2, colour="gray80") + facet_wrap(~ Evit) + labs(x="Tempo", y="Peso") + theme_bw() g15 1 2 3 2.5 5.0 7.5 10.0 12.5 2.5 5.0 7.5 10.0 12.5 2.5 5.0 7.5 10.0 12.5 30 60 90 120 Tempo Pe so 41/57 Séries temporais: US economic time series Este conjunto de dados foi produzido a partir de dados de séries temporais econômicas dos EUA, disponíveis em http://research.stlouisfed.org/fred2. Dados com 478 linhas e 6 variáveis: date: Mês de coleta de dados. psavert: Taxa de poupança pessoal. pce: Gastos com consumo pessoal, em bilhões de dólares. unemploy: Número de desempregados em milhares. uempmed: Duração mediana do desemprego, em semanas. pop: População total, em milhares. 42/57 http://research.stlouisfed.org/fred2 head(economics) # A tibble: 6 x 6 date pce pop psavert uempmed unemploy <date> <dbl> <int> <dbl> <dbl> <int> 1 1967-07-01 507.4 198712 12.5 4.5 2944 2 1967-08-01 510.5 198911 12.5 4.7 2945 3 1967-09-01 516.3 199113 11.7 4.6 2958 4 1967-10-01 512.9 199311 12.54.9 3143 5 1967-11-01 518.1 199498 12.5 4.7 3066 6 1967-12-01 525.8 199657 12.1 4.8 3018 str(economics) Classes 'tbl_df', 'tbl' and 'data.frame': 574 obs. of 6 variables: $ date : Date, format: "1967-07-01" "1967-08-01" ... $ pce : num 507 510 516 513 518 ... $ pop : int 198712 198911 199113 199311 199498 199657 199808 199920 200056 200208 ... $ psavert : num 12.5 12.5 11.7 12.5 12.5 12.1 11.7 12.2 11.6 12.2 ... $ uempmed : num 4.5 4.7 4.6 4.9 4.7 4.8 5.1 4.5 4.1 4.6 ... $ unemploy: int 2944 2945 2958 3143 3066 3018 2878 3001 2877 2709 ... 43/57 library(dplyr) series <- economics %>% select(date, psavert, uempmed) %>% gather(key = "variable", value = "value", -date) head(series,5) # A tibble: 5 x 3 date variable value <date> <chr> <dbl> 1 1967-07-01 psavert 12.5 2 1967-08-01 psavert 12.5 3 1967-09-01 psavert 11.7 4 1967-10-01 psavert 12.5 5 1967-11-01 psavert 12.5 str(series) Classes 'tbl_df', 'tbl' and 'data.frame': 1148 obs. of 3 variables: $ date : Date, format: "1967-07-01" "1967-08-01" ... $ variable: chr "psavert" "psavert" "psavert" "psavert" ... $ value : num 12.5 12.5 11.7 12.5 12.5 12.1 11.7 12.2 11.6 12.2 ... 44/57 g16 <- ggplot(series, aes(x = date, y = value)) + geom_line(aes(color = variable), size = 1) + scale_color_manual(values = c("#80b1d3", "#fb8072")) + labs(x="", y="") + theme(legend.position="bottom", legend.box = "horizontal") + guides(color=guide_legend("séries")) g16 5 10 15 20 25 1970 1980 1990 2000 2010 séries psavert uempmed 45/57 g17 <- ggplot(series, aes(x = date, y = value)) + geom_area(aes(color = variable, fill = variable), alpha = 0.5, position = position_dodge(0.8)) + geom_line(aes(color = variable), size = 1) + scale_color_manual(values = c("#80b1d3", "#fb8072")) + scale_fill_manual(values = c("#80b1d3", "#fb8072")) + labs(x="", y="") + theme(legend.position="bottom", legend.box = "horizontal") + guides(color=guide_legend("séries")) g17 0 5 10 15 20 25 1970 1980 1990 2000 2010 séries psavert uempmed variable psavert uempmed 46/57 Mapas library(ggmap) library(maps) library(mapdata) world <- map_data("world") dim(world) [1] 99338 6 head(world) long lat group order region subregion 1 -69.89912 12.45200 1 1 Aruba <NA> 2 -69.89571 12.42300 1 2 Aruba <NA> 3 -69.94219 12.43853 1 3 Aruba <NA> 4 -70.00415 12.50049 1 4 Aruba <NA> 5 -70.06612 12.54697 1 5 Aruba <NA> 6 -70.05088 12.59707 1 6 Aruba <NA> 47/57 g18 <- ggplot() + geom_polygon(data = world, aes(x=long, y = lat, group = group)) + coord_fixed(1.3) + labs(x="longitude", y="latitude") g18 −50 0 50 −100 0 100 200 longitude la tit ud e 48/57 g19 <- ggplot() + geom_polygon(data = world, aes(x=long, y = lat, group = group), fill = NA, color = "#80b1d3") + coord_fixed(1.3) + labs(x="longitude", y="latitude") g19 −50 0 50 −100 0 100 200 longitude la tit ud e 49/57 g20 <- ggplot() + geom_polygon(data = world, aes(x=long, y = lat, group = group), fill = "#fb8072", color = "gray90", alpha=0.7) + coord_fixed(1.3) + labs(x="longitude", y="latitude") g20 −50 0 50 −100 0 100 200 longitude la tit ud e 50/57 usa <- map_data("usa") dim(usa) [1] 7243 6 head(usa) long lat group order region subregion 1 -101.4078 29.74224 1 1 main <NA> 2 -101.3906 29.74224 1 2 main <NA> 3 -101.3620 29.65056 1 3 main <NA> 4 -101.3505 29.63911 1 4 main <NA> 5 -101.3219 29.63338 1 5 main <NA> 6 -101.3047 29.64484 1 6 main <NA> states <- map_data("state") dim(states) [1] 15537 6 51/57 g21 <- ggplot(data = states) + geom_polygon(aes(x = long, y = lat, fill = region, group = group), color = "white") + coord_fixed(1.3) + labs(x="longitude", y="latitude") + guides(fill=FALSE) # Tirar a legenda g21 25 30 35 40 45 50 −120 −100 −80 longitude la tit ud e 52/57 arrests <- USArrests names(arrests) <- tolower(names(arrests)) arrests$region <- tolower(rownames(USArrests)) data_usa <- merge(states, arrests, sort = FALSE, by = "region") data_usa[1:3,1:5] region long lat group order 1 alabama -87.46201 30.38968 1 1 2 alabama -87.48493 30.37249 1 2 3 alabama -87.95475 30.24644 1 13 data_usa1 <- data_usa[order(data_usa$order), ] data_usa1[1:3,1:5] region long lat group order 1 alabama -87.46201 30.38968 1 1 2 alabama -87.48493 30.37249 1 2 6 alabama -87.52503 30.37249 1 3 names(data_usa1)<-c("região","longitude","latitude","grupo","ordem", "subregião","assassinato","assalto" ,"urbanpop" ,"estupro") 53/57 g22 <- ggplot(data_usa1, aes(longitude, latitude)) + geom_polygon(aes(group = grupo, fill = assalto)) + coord_map("albers", at0 = 45.5, lat1 = 29.5) ## http://ggplot2.tidyverse.org/reference/coord_map.html - coord_map g22 25 30 35 40 45 50 −120 −100 −80 longitude la tit ud e 100 200 300 assalto 54/57 brasil <- map_data("world", region="Brazil") # mapa do brasil names(brasil)<-c("longitude","latitude","grupo","ordem","região","subregião") g23 <- ggplot(data = brasil) + geom_polygon( aes(x=longitude, y=latitude, group=grupo), color = "yellow", fill="green4") + coord_map("mercator") ## Mercator projection g23 −30 −20 −10 0 −70 −60 −50 −40 longitude lat itu de 55/57 brasil <- map_data("world", region="Brazil") # mapa do brasil names(brasil)<-c("longitude","latitude","grupo","ordem","região","subregião") g24 <- ggplot(data = brasil) + geom_polygon( aes(x=longitude, y=latitude, group=grupo, fill=subregião)) + coord_map("mercator") g24 −30 −20 −10 0 −70 −60 −50 −40 longitude lat itu de subregião 14 5 6 7 8 Ihla Mexiana Ilha Caviana Ilha de Maraca Ilha de Marajo Ilha de Santa Catarina Ilha de Sao Francisco Ilha de Sao Sebastiao Ilha Grande Ilha Grande de Gurupa Ilha Janaucu Ilha Queimada NA 56/57 http://r-statistics.co/ Top50-Ggplot2-Visualizations-MasterList-R-Code.html Mapas Brasil: https://rpubs.com/gomes555/mapas 57/57 http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html https://rpubs.com/gomes555/mapas
Compartilhar