This document downloads and analyses data providad by “Instituto de la Salud Carlos III” associated with the following data file https://cnecovid.isciii.es/covid19/resources/casos_hosp_uci_def_sexo_edad_provres.csv.
rm(list=ls())
setwd("C:/Users/rominsol/Mi unidad/uc3m/IntroductiontoDataScience/MaterialCurso2022_23/Class14")
print('Download data file https://cnecovid.isciii.es/covid19/resources/casos_hosp_uci_def_sexo_edad_provres.csv')
## [1] "Download data file https://cnecovid.isciii.es/covid19/resources/casos_hosp_uci_def_sexo_edad_provres.csv"
print(paste(c('and storage it in the folder ', getwd())))
## [1] "and storage it in the folder "
## [2] "C:/Users/rominsol/Mi unidad/uc3m/IntroductiontoDataScience/MaterialCurso2022_23/Class14"
#input('con el nombre dataMOMO.csv');
='https://cnecovid.isciii.es/covid19/resources/casos_hosp_uci_def_sexo_edad_provres.csv';
url = 'dataISCIII.csv';
filename if(!file.exists(filename)){
download.file(url, filename)
}=installed.packages()[,"Package"]
aux
#
if(!("lubridate"%in%installed.packages()[,"Package"])){
install.packages("lubridate", repos="http://cran.rstudio.com/", dependencies=TRUE)
}library("lubridate")
## Loading required package: timechange
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Load current ISCIII data set
= read.csv(filename,
data header=TRUE,
sep=",",
dec=".",
stringsAsFactors=TRUE,
encoding="UTF-8")
Once the data is available in the environment, we take a look to the structure of the information content. Note that some of the variables are storaged as factor, including the date. The rest of variables correspond to integers:
head(data,15)
## provincia_iso sexo grupo_edad fecha num_casos num_hosp num_uci num_def
## 1 A H 0-9 2020-01-01 0 0 0 0
## 2 A H 10-19 2020-01-01 0 0 0 0
## 3 A H 20-29 2020-01-01 0 0 0 0
## 4 A H 30-39 2020-01-01 0 0 0 0
## 5 A H 40-49 2020-01-01 0 0 0 0
## 6 A H 50-59 2020-01-01 0 0 0 0
## 7 A H 60-69 2020-01-01 0 0 0 0
## 8 A H 70-79 2020-01-01 0 0 0 0
## 9 A H 80+ 2020-01-01 0 0 0 0
## 10 A H NC 2020-01-01 0 0 0 0
## 11 A M 0-9 2020-01-01 0 0 0 0
## 12 A M 10-19 2020-01-01 0 0 0 0
## 13 A M 20-29 2020-01-01 0 0 0 0
## 14 A M 30-39 2020-01-01 0 0 0 0
## 15 A M 40-49 2020-01-01 0 0 0 0
Before starting the exploratory data analysis, we proceed to generate three additional auxiliary variables associated with date, the day of the year and the year information for each record, respectively:
# Create dates
$date=as.Date(data$fecha)
data# Create the days within() the year as a factor and include it in the data frame
$yearday=as.factor(yday(data$date))
data# Create the years within the year as a factor and include it in the data frame
$year=as.factor(year(data$date))
datastr(data)
## 'data.frame': 1299030 obs. of 11 variables:
## $ provincia_iso: Factor w/ 52 levels "A","AB","AL",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sexo : Factor w/ 3 levels "H","M","NC": 1 1 1 1 1 1 1 1 1 1 ...
## $ grupo_edad : Factor w/ 10 levels "0-9","10-19",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ fecha : Factor w/ 817 levels "2020-01-01","2020-01-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ num_casos : int 0 0 0 0 0 0 0 0 0 0 ...
## $ num_hosp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ num_uci : int 0 0 0 0 0 0 0 0 0 0 ...
## $ num_def : int 0 0 0 0 0 0 0 0 0 0 ...
## $ date : Date, format: "2020-01-01" "2020-01-01" ...
## $ yearday : Factor w/ 366 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : Factor w/ 3 levels "2020","2021",..: 1 1 1 1 1 1 1 1 1 1 ...
library("ggplot2")
We initially represent the temporal daily evolution of deaths during pandemic. Since the initial information is distributed by different age groups and sex categories, we need to aggregate the numbers for all possible values of these two categorical variables:
=aggregate(cbind(num_casos=data$num_casos,
dailydatspainnum_hosp=data$num_hosp,
num_uci=data$num_uci,
num_def=data$num_def
), by=list(fecha=data$fecha,
yearday=data$yearday,
year=data$year),
FUN=sum)
str(dailydatspain)
## 'data.frame': 817 obs. of 7 variables:
## $ fecha : Factor w/ 817 levels "2020-01-01","2020-01-02",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ yearday : Factor w/ 366 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ year : Factor w/ 3 levels "2020","2021",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ num_casos: int 0 0 0 0 0 0 0 0 1 1 ...
## $ num_hosp : int 8 75 8 6 7 5 6 4 8 9 ...
## $ num_uci : int 0 1 1 0 0 0 1 0 0 0 ...
## $ num_def : int 0 0 0 0 0 0 0 0 0 0 ...
=ggplot(data=dailydatspain)+
faes(x=fecha)+
geom_point(aes(y=num_def),shape=23,colour="black",fill="blue")+
xlab("Date")+
ylab("COVID deaths")+
scale_x_discrete(
breaks=dailydatspain$fecha[seq(1, length(dailydatspain$fecha), by=15)],
labels = dailydatspain$fecha[seq(1, length(dailydatspain$fecha), by=15)]
+
)theme(axis.text.x = element_text(angle = 90,hjust=1),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/ISCIII_Spain.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
Now we make the same plot but collapsed within one year and using different colors depending on the year of the record:
=as.character(cumsum(c(31,31,28,30,31,30,31,31,30,31,30,32)))
breaks= c("Jan",
labels "Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec")
=ggplot(data=dailydatspain)+
faes(x=yearday,colour=year,fill=year)+
geom_point(aes(y=num_def))+
xlab("Date")+
ylab("COVID deaths")+
scale_x_discrete(
breaks=breaks,
labels =labels
+
)theme(axis.text.x = element_text(hjust=1))+
ggtitle(paste("Datos procedentes del ISCII (",Sys.Date(),")"))+
theme(plot.title = element_text(size=15,hjust = 0.5),text = element_text(size = 15),axis.text = element_text(angle = 45))
ggsave(gsub(" ",
"",
paste("RGraphs/ISCIII_Spain2.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
For making two-way or contingency tables, it is convenient to organize the number of deaths by province and age group:
=aggregate(list(num_def=data$num_def),
datprovinciasby=list(provincias=data$provincia_iso,
grupo_edad=data$grupo_edad),
FUN=sum)
str(datprovincias)
## 'data.frame': 520 obs. of 3 variables:
## $ provincias: Factor w/ 52 levels "A","AB","AL",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ grupo_edad: Factor w/ 10 levels "0-9","10-19",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ num_def : int 0 1 1 0 2 0 1 0 0 1 ...
With this new information available it is possible to construct the following table:
=matrix(datprovincias$num_def, nrow = length(levels(data$grupo_edad)), byrow = TRUE)
ctablerownames(ctable)=levels(data$grupo_edad)
colnames(ctable)=levels(data$provincia_iso)
=cbind(ctable,apply(ctable,1,sum))
ctable=rbind(ctable,apply(ctable,2,sum))
ctablerownames(ctable)[nrow(ctable)]="All"
colnames(ctable)[ncol(ctable)]="SP"
print(t(ctable))
## 0-9 10-19 20-29 30-39 40-49 50-59 60-69 70-79 80+ NC All
## A 0 1 3 10 58 160 407 869 1956 0 3464
## AB 1 0 1 1 4 37 86 219 634 0 983
## AL 1 0 7 14 46 80 178 297 526 0 1149
## AV 0 0 1 0 3 17 49 89 416 0 575
## B 2 3 12 27 134 456 1324 3062 9545 0 14565
## BA 0 0 2 5 10 56 130 244 741 1 1189
## BI 1 0 2 14 30 80 246 566 2115 0 3054
## BU 0 0 0 1 7 25 72 182 799 0 1086
## C 0 0 0 3 10 43 96 302 924 0 1378
## CA 1 1 7 14 26 126 274 464 851 0 1764
## CC 1 0 1 0 8 31 94 190 746 0 1071
## CE 0 0 2 0 4 9 42 34 55 0 146
## CO 0 0 1 6 16 49 133 286 829 0 1320
## CR 0 1 5 8 26 60 196 427 1177 0 1900
## CS 0 0 1 1 13 41 105 241 651 0 1053
## CU 0 0 0 1 4 27 58 123 497 0 710
## GC 1 0 1 7 21 50 127 175 317 0 699
## GI 1 0 3 6 22 86 191 371 1177 0 1857
## GR 0 1 10 18 34 105 257 513 1139 0 2077
## GU 0 0 0 1 11 20 54 133 534 0 753
## H 0 0 0 3 11 27 57 128 259 0 485
## HU 0 0 1 4 3 20 55 120 536 4 743
## J 0 2 3 3 23 78 159 286 768 0 1322
## L 0 0 1 3 9 36 84 153 618 0 904
## LE 0 0 1 2 4 28 77 191 808 0 1111
## LO 0 1 0 4 15 24 61 180 627 0 912
## LU 0 0 0 0 1 5 26 42 254 0 328
## M 5 5 21 49 243 760 2045 4527 11278 0 18933
## MA 2 3 16 18 35 150 328 628 1172 0 2352
## ML 0 0 0 2 4 16 21 36 75 0 154
## MU 0 0 1 7 39 105 258 431 1375 0 2216
## NC 0 0 0 2 5 15 22 60 142 184 430
## O 0 0 1 5 17 73 220 485 1802 0 2603
## OR 0 0 0 1 0 14 20 93 417 0 545
## P 0 0 0 1 8 18 66 128 444 0 665
## PM 0 3 2 5 19 53 157 331 807 0 1377
## PO 0 0 1 1 3 38 71 182 651 0 947
## S 0 0 0 1 6 23 53 136 570 0 789
## SA 1 0 1 2 6 24 91 184 879 1 1189
## SE 1 1 10 25 49 203 441 720 1433 0 2883
## SG 0 0 0 0 5 16 51 101 490 0 663
## SO 0 0 0 0 4 12 33 70 370 0 489
## SS 1 0 1 8 17 76 186 413 1349 0 2051
## T 0 2 2 6 14 46 133 311 960 0 1474
## TE 1 1 1 0 6 14 38 71 410 1 543
## TF 2 0 5 8 17 73 122 250 460 0 937
## TO 1 0 4 7 25 96 308 570 1744 0 2755
## V 0 1 4 20 43 174 507 1023 2842 0 4614
## VA 0 1 0 6 7 51 149 353 1316 0 1883
## VI 0 0 3 1 10 23 75 214 585 0 911
## Z 1 1 1 10 26 107 294 603 2305 16 3364
## ZA 0 0 0 1 4 12 57 106 480 0 660
## SP 24 28 139 342 1165 3968 10384 21913 63855 207 102025
If we are just interested in number of deaths per age group regardless of the province:
library("knitr")
=prop.table(ctable[1:(nrow(ctable)-1),ncol(ctable)])
prop=cbind(100*prop,100*cumsum(prop),ctable[1:(nrow(ctable)-1),ncol(ctable)],cumsum(ctable[1:(nrow(ctable)-1),ncol(ctable)]))
propcolnames(prop)=c("% deaths","% Cum deaths","Deaths","Cum deaths")
kable(prop,caption="Proportion of deaths by age group")
% deaths | % Cum deaths | Deaths | Cum deaths | |
---|---|---|---|---|
0-9 | 0.0235236 | 0.0235236 | 24 | 24 |
10-19 | 0.0274443 | 0.0509679 | 28 | 52 |
20-29 | 0.1362411 | 0.1872090 | 139 | 191 |
30-39 | 0.3352120 | 0.5224210 | 342 | 533 |
40-49 | 1.1418770 | 1.6642980 | 1165 | 1698 |
50-59 | 3.8892428 | 5.5535408 | 3968 | 5666 |
60-69 | 10.1778976 | 15.7314384 | 10384 | 16050 |
70-79 | 21.4780691 | 37.2095075 | 21913 | 37963 |
80+ | 62.5876011 | 99.7971086 | 63855 | 101818 |
NC | 0.2028914 | 100.0000000 | 207 | 102025 |
Next we want to show the information storage in the “datprovincias” data frame using a box plot. Note that we have to use a logarithmic scale in the vertical axis:
=ggplot(data=datprovincias)+
faes(x=grupo_edad,colour=grupo_edad)+
geom_boxplot(aes(y=num_def))+
geom_jitter(aes(y=num_def))+
xlab("Age group")+
ylab("COVID deaths by province")+
scale_y_log10()+
ggtitle(paste("Official COVID deaths by province and age group (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20),legend.position = "none")
ggsave(gsub(" ",
"",
paste("RGraphs/ISCIII_Boxplot_Age_Province.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
We could present the same data but acumulated by age group:
If we plot the number of deaths by province and splitting the information by age group:
=ggplot(data=datprovincias)+
faes(x=provincias,fill=grupo_edad)+
geom_bar(aes(y=num_def),stat = "identity")+
xlab("Province")+
ylab("Deaths by COVID")+
labs(fill="Age group")+
ggtitle(paste("Bar graph of COVID deaths by province and age group (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5,size=20),text = element_text(size = 12),axis.text.x = element_text(angle=90))
ggsave(gsub(" ",
"",
paste("RGraphs/ISCIII_Barplot_Age_Province.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
If we show the bars in relative terms we get:
=ggplot(data=datprovincias)+
faes(x=provincias,fill=grupo_edad)+
geom_bar(aes(y=num_def),stat = "identity",position="fill")+
xlab("Province")+
ylab("Deaths by COVID")+
labs(fill="Age group")+
ggtitle(paste("Bar graph of COVID deaths by province and age group (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5,size=20),text = element_text(size = 12),axis.text.x = element_text(angle=90))
ggsave(gsub(" ",
"",
paste("RGraphs/ISCIII_Bar2plot_Age_Province.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
MoMo is a system for monitoring daily mortality from all causes in Spain. Its objective is to identify unusual patterns of mortality and estimate the impact on the mortality of the population of any important event to guide action in Public Health.
It is managed by the CNE and uses data from various public administrations.
MoMo feeds fundamentally on data on daily deaths from all causes from the National Institute of Statistics (INE), up to the year in which they are available, and on civil records that are currently computerized, from that year to the present, and which are received through the Ministry of Justice. The coverage rate by Autonomous Community is provided in the “Notification” tab.
MoMo data might have some delay on reporting date. For example, deaths occurring on day 1 are reported on successive days (2, 3, …). Typically, and in normal periods, practically all the deaths that occurred on day 1 are already available on day 7. Specifically, the deaths observed in recent days, such as those shown highlighted in the blue window, the right of the image, may undergo significant changes in the following days.
Data is available in CSV format from https://momo.isciii.es/public/momo/dashboard/momo_dashboard.html#datos with the following information:
MoMo data file is storaged in the following address https://momo.isciii.es/public/momo/dashboard/momo_dashboard.html#datos. It has around 400Mb, before downloading it from the web we check if it is available in our working directory. Once it is in our folder, load it:
print('Download file MOMO https://momo.isciii.es/public/momo/dashboard/momo_dashboard.html#datos')
## [1] "Download file MOMO https://momo.isciii.es/public/momo/dashboard/momo_dashboard.html#datos"
print(paste(c('and storage it in the folder ', getwd())))
## [1] "and storage it in the folder "
## [2] "C:/Users/rominsol/Mi unidad/uc3m/IntroductiontoDataScience/MaterialCurso2022_23/Class14"
#input('con el nombre dataMOMO.csv');
='https://momo.isciii.es/public/momo/data';
url = 'dataMOMO.csv';
filename if(!file.exists(filename)){
download.file(url, filename, timeout = 6000)
}
# Load current MOMO data set
= read.csv(filename,
dataMOMO header=TRUE,
sep=",",
dec=".",
stringsAsFactors=TRUE,
encoding="UTF-8")
We are going to generate additional variables to storage dates as proper dates, the day of the year of each record, the year and even the month. We also drop those levels of factors if we do not have data associated with the corresponding level:
# Create dates
$date_defuncion=as.Date(dataMOMO$fecha_defuncion)
dataMOMO# Create the days within the year as a factor and include it in the data frame
$yearday=as.factor(yday(dataMOMO$date_defuncion))
dataMOMO# Create the years within the year as a factor and include it in the data frame
$year=as.factor(year(dataMOMO$date_defuncion))
dataMOMO# Create the months within the year as a factor and include it in the data frame
$month=as.factor(month(dataMOMO$date_defuncion))
dataMOMO
#dataMOMO=dataMOMO[-which(dataMOMO$year%in%c(2015,2016,2017,2018)),]
$defunciones_observadas=as.integer(dataMOMO$defunciones_observadas);
dataMOMO
#dataMOMO=na.omit(dataMOMO)
$year=droplevels(dataMOMO$year) dataMOMO
We initially draw from the global dataset, with national information for both genders and all age groups for each day.
=dataMOMO[dataMOMO$ambito=="nacional" &
dataMOMO_SP$nombre_sexo=="todos" &
dataMOMO$cod_gedad=="all",
dataMOMOc("fecha_defuncion",
"yearday",
"year",
"month",
"defunciones_observadas",
"defunciones_estimadas_base",
"defunciones_estimadas_base_q99",
"defunciones_estimadas_base_q01")]
=na.omit(dataMOMO_SP)
dataMOMO_SP
=ggplot(data=dataMOMO_SP)+
faes(x=yearday,y=defunciones_observadas,colour=year)+
geom_ribbon(
aes(ymin=defunciones_estimadas_base_q01,
ymax=defunciones_estimadas_base_q99,
group=year),
fill="grey")+
geom_point(size=5)+
xlab("Day within year")+
ylab("Daily deaths for all causes")+
scale_x_discrete(
breaks=breaks,
labels = labels
+
)theme(axis.text.x = element_text(hjust=1),text = element_text(size = 20))+
ggtitle(paste("Data taken from MoMo system (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/MOMO_Spain.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
If we plot the same information using box plot, we get the following graph:
# Plot for total Fallecidos in an annual scale including 99% confidence bands
=ggplot(data=dataMOMO_SP)+
faes(x=month,y=defunciones_observadas,colour=year)+
#geom_jitter(aes(shape=year))+
geom_ribbon(
aes(ymin=defunciones_estimadas_base_q01,
ymax=defunciones_estimadas_base_q99,
group=year),
fill="grey")+
geom_boxplot()+
xlab("Months")+
ylab("Daily deaths for all causes")+
scale_x_discrete(
labels = labels
+
)ggtitle(paste("Data taken from MoMo (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/MOMO_Boxplot_Spain.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
Next we aggregate deaths by month and plot it using a bar graph:
=aggregate(list(num_def=dataMOMO_SP$defunciones_observadas),
dataMOMOmesesSPby=list(year=dataMOMO_SP$year,
month=dataMOMO_SP$month),
FUN=sum)
#dataMOMOmesesSP=as.data.frame(lapply(dataMOMOmesesSP,function(x)rep(x, as.integer(dataMOMOmesesSP$num_def))))
#dataMOMOmesesSP$num_def=NULL
# Plot for total Fallecidos in an annual scale including 99% confidence bands
=ggplot(data=dataMOMOmesesSP)+
faes(x=month,fill=year,y=num_def)+
#geom_jitter(aes(shape=year))+
geom_bar(stat="identity",position="dodge")+
geom_text(aes(label=num_def),position = position_dodge(width = 1),angle=90,hjust=1,size=5)+
xlab("Months")+
ylab("Monthly deaths for all causes")+
scale_x_discrete(
labels = labels
+
)ggtitle(paste("Data taken from MoMo (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/MOMO_Barplot_Spain.png"),
fixed = TRUE),
width=30,
height=20,
units = "cm")
print(f)
If we take into consideration the age group we can get the following information:
=dataMOMO[dataMOMO$ambito=="nacional" &
dataMOMO_SPAge$nombre_sexo=="todos",
dataMOMOc("fecha_defuncion",
"yearday",
"year",
"month",
"cod_gedad",
"defunciones_observadas",
"defunciones_estimadas_base",
"defunciones_estimadas_base_q99",
"defunciones_estimadas_base_q01")]
$cod_gedad=factor(dataMOMO_SPAge$cod_gedad,levels=c("0-14","15-44","45-64","65-74","75-84","+85","all" ))
dataMOMO_SPAge
=na.omit(dataMOMO_SPAge) dataMOMO_SPAge
The death evolution throughout different years can be easily displayed:
=ggplot(data=dataMOMO_SPAge)+
faes(x=yearday,y=defunciones_observadas,colour=year)+
geom_ribbon(
aes(ymin=defunciones_estimadas_base_q01,
ymax=defunciones_estimadas_base_q99,
group=year),
fill="grey")+
geom_point(size=3)+
facet_grid(cod_gedad~.,scales="free_y")+
xlab("Months")+
ylab("Daily deaths by all causes")+
scale_x_discrete(
breaks=breaks,
labels =labels
+
)theme(axis.text.x = element_text(hjust=1))+
ggtitle(paste("Data taken from MoMo (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/MOMO_Ages_Spain.png"),
fixed = TRUE),
width=30,
height=30,
units = "cm")
print(f)
Similarly, we group the data by month and display box plots for different years and age groups. This makes it easier to compare the monthly distributions of deaths between different years.
=ggplot(data=dataMOMO_SPAge)+
faes(x=month,y=defunciones_observadas,colour=year)+
geom_ribbon(
aes(ymin=defunciones_estimadas_base_q01,
ymax=defunciones_estimadas_base_q99,
group=year),
fill="grey")+
geom_boxplot()+
facet_grid(cod_gedad~.,scales="free_y")+
xlab("Months")+
ylab("Daily deaths for all causes")+
scale_x_discrete(
labels =labels
+
)ggtitle(paste("Data taken from MoMo system (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/MOMO_AgesBoxplot_Spain.png"),
fixed = TRUE),
width=30,
height=30,
units = "cm")
print(f)
Below we have the information about monthly deceased by agegroups and years:
=aggregate(list(num_def=dataMOMO_SPAge$defunciones_observadas),
dataMOMOmesesSPageby=list(year=dataMOMO_SPAge$year,
month=dataMOMO_SPAge$month,
cod_gedad=dataMOMO_SPAge$cod_gedad),
FUN=sum)
# dataMOMOmesesSPage=as.data.frame(lapply(dataMOMOmesesSPage,function(x)rep(x, as.integer(dataMOMOmesesSPage$num_def))))
# dataMOMOmesesSPage$num_def=NULL
# Plot for total Fallecidos in an annual scale including 99% confidence bands
=ggplot(data=dataMOMOmesesSPage)+
faes(x=month,fill=year,y=num_def)+
#geom_jitter(aes(shape=year))+
geom_bar(stat="identity",position="dodge")+
geom_text(aes(label=num_def),position = position_dodge(width = 1),angle=90,hjust=1,size=4 )+
facet_grid(cod_gedad~.,scales="free")+
xlab("Months")+
ylab("Monthly deaths for all causes")+
scale_x_discrete(
labels = labels
+ggtitle(paste("Data taken from MoMo system (",Sys.Date(),")"))+
)theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/MOMO_Barplot_SpainEDAD.png"),
fixed = TRUE),
width=30,
height=60,
units = "cm")
We are going to show the graphs made at the national level but by Autonomous Communities:
=dataMOMO[dataMOMO$ambito=="ccaa" &
dataMOMO_CCAAge$nombre_sexo=="todos",
dataMOMOc("fecha_defuncion",
"yearday",
"year",
"month",
"cod_gedad",
"nombre_ambito",
"defunciones_observadas",
"defunciones_estimadas_base",
"defunciones_estimadas_base_q99",
"defunciones_estimadas_base_q01")]
$cod_gedad=factor(dataMOMO_CCAAge$cod_gedad,levels=c("0-14","15-44","45-64","65-74","75-84","+85","all" ))
dataMOMO_CCAAge$nombre_ambito=droplevels(dataMOMO_CCAAge$nombre_ambito)
dataMOMO_CCAAge= levels(dataMOMO_CCAAge$nombre_ambito)
ccaas #ccaas = c("Andalucía","Aragón","Asturias, Principado de","Balears, Illes","Canarias","Cantabria","Castilla - La Mancha","Castilla y León","Cataluña","Ceuta","Comunitat Valenciana","Extremadura","Galicia","Madrid, Comunidad de","Melilla","Murcia, Región de","Navarra, Comunidad Foral de","País Vasco","Rioja, La")
= list(Andalucía=c("AL","CA","CO","GR","HU","J","MA","SE"),
provincias =c("HU","TE","Z"),
Aragón"Asturias, Principado de"=c("A"),
"Balears, Illes"=c("BA"),
Canarias=c("PM","TF"),
Cantabria=c("S"),
"Castilla - La Mancha"=c("AB","CR","CU","GU","TO"),
"Castilla y León"=c("AV","BU","LE","P","SA","SG","SO","VA","ZA"),
= c("B","GI","L","T"),
Cataluña Ceuta = c("CE"),
"Comunitat Valenciana"=c("A","CS","V"),
Extremadura = c("BA","CA"),
Galicia=c("C","LU","OR","PO"),
"Madrid, Comunidad de"= c("M"),
Melilla=c("ML"),
"Murcia, Región de"=c("MU"),
"Navarra, Comunidad Foral de" = c("NC") ,
"País Vasco" = c("VI","BI","SS"),
"Rioja, La" = c("LO")
)
str(dataMOMO_CCAAge)
## 'data.frame': 464208 obs. of 10 variables:
## $ fecha_defuncion : Factor w/ 3054 levels "2015-01-01","2015-01-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ yearday : Factor w/ 366 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ year : Factor w/ 9 levels "2015","2016",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ month : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ cod_gedad : Factor w/ 7 levels "0-14","15-44",..: NA 6 1 2 3 4 5 7 NA 6 ...
## $ nombre_ambito : Factor w/ 19 levels "Andalucía","Aragón",..: 1 1 1 1 1 1 1 1 2 2 ...
## $ defunciones_observadas : int 229 104 2 5 41 47 78 279 36 19 ...
## $ defunciones_estimadas_base : num 174.87 77.04 1.15 4.62 25.18 ...
## $ defunciones_estimadas_base_q99: num 206 98 4 10 38 43 84 242 49 30 ...
## $ defunciones_estimadas_base_q01: num 145 57 0 1 14 18 47 175 22 10 ...
=na.omit(dataMOMO_CCAAge)
dataMOMO_CCAAge
=aggregate(list(num_def=dataMOMO_CCAAge$defunciones_observadas),
dataMOMO_mesesCCAAgeby=list(year=dataMOMO_CCAAge$year,
month=dataMOMO_CCAAge$month,
cod_gedad=dataMOMO_CCAAge$cod_gedad,
nombre_ambito=dataMOMO_CCAAge$nombre_ambito),
FUN=sum)
The tables showing official COVID deaths by Autonomous Communities are given below:
for(j in 1:length(ccaas)){
if(length(provincias[[j]])>1){
=cbind(apply(ctable[,colnames(ctable)%in%provincias[[j]]],MARGIN=1,FUN=sum),
tablacumsum(apply(ctable[,colnames(ctable)%in%provincias[[j]]],MARGIN=1,FUN=sum)))
nrow(tabla),ncol(tabla)]="--"
tabla[=kable(tabla,
fcaption=paste("Deceased by age group in ",names(provincias)[j]),
col.names = c("Deaths","Cum. deaths"),
align="r")
else{
}=cbind(ctable[,colnames(ctable)%in%provincias[[j]]],
tablacumsum(ctable[,colnames(ctable)%in%provincias[[j]]]))
nrow(tabla),ncol(tabla)]="--"
tabla[=kable(tabla,
fcaption=paste("Deceased by age group in ",names(provincias)[j]),
col.names = c("Deaths","Cum. deaths"),
align="r")
}print(f)
}
##
##
## Table: Deceased by age group in Andalucía
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 5| 5|
## |10-19 | 8| 13|
## |20-29 | 55| 68|
## |30-39 | 102| 170|
## |40-49 | 232| 402|
## |50-59 | 811| 1213|
## |60-69 | 1825| 3038|
## |70-79 | 3314| 6352|
## |80+ | 7254| 13606|
## |NC | 4| 13610|
## |All | 13610| --|
##
##
## Table: Deceased by age group in Aragón
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 2| 2|
## |10-19 | 2| 4|
## |20-29 | 3| 7|
## |30-39 | 14| 21|
## |40-49 | 35| 56|
## |50-59 | 141| 197|
## |60-69 | 387| 584|
## |70-79 | 794| 1378|
## |80+ | 3251| 4629|
## |NC | 21| 4650|
## |All | 4650| --|
##
##
## Table: Deceased by age group in Asturias, Principado de
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 1| 1|
## |20-29 | 3| 4|
## |30-39 | 10| 14|
## |40-49 | 58| 72|
## |50-59 | 160| 232|
## |60-69 | 407| 639|
## |70-79 | 869| 1508|
## |80+ | 1956| 3464|
## |NC | 0| 3464|
## |All | 3464| --|
##
##
## Table: Deceased by age group in Balears, Illes
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 2| 2|
## |30-39 | 5| 7|
## |40-49 | 10| 17|
## |50-59 | 56| 73|
## |60-69 | 130| 203|
## |70-79 | 244| 447|
## |80+ | 741| 1188|
## |NC | 1| 1189|
## |All | 1189| --|
##
##
## Table: Deceased by age group in Canarias
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 2| 2|
## |10-19 | 3| 5|
## |20-29 | 7| 12|
## |30-39 | 13| 25|
## |40-49 | 36| 61|
## |50-59 | 126| 187|
## |60-69 | 279| 466|
## |70-79 | 581| 1047|
## |80+ | 1267| 2314|
## |NC | 0| 2314|
## |All | 2314| --|
##
##
## Table: Deceased by age group in Cantabria
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 0| 0|
## |30-39 | 1| 1|
## |40-49 | 6| 7|
## |50-59 | 23| 30|
## |60-69 | 53| 83|
## |70-79 | 136| 219|
## |80+ | 570| 789|
## |NC | 0| 789|
## |All | 789| --|
##
##
## Table: Deceased by age group in Castilla - La Mancha
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 2| 2|
## |10-19 | 1| 3|
## |20-29 | 10| 13|
## |30-39 | 18| 31|
## |40-49 | 70| 101|
## |50-59 | 240| 341|
## |60-69 | 702| 1043|
## |70-79 | 1472| 2515|
## |80+ | 4586| 7101|
## |NC | 0| 7101|
## |All | 7101| --|
##
##
## Table: Deceased by age group in Castilla y León
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 1| 1|
## |10-19 | 1| 2|
## |20-29 | 3| 5|
## |30-39 | 13| 18|
## |40-49 | 48| 66|
## |50-59 | 203| 269|
## |60-69 | 645| 914|
## |70-79 | 1404| 2318|
## |80+ | 6002| 8320|
## |NC | 1| 8321|
## |All | 8321| --|
##
##
## Table: Deceased by age group in Cataluña
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 3| 3|
## |10-19 | 5| 8|
## |20-29 | 18| 26|
## |30-39 | 42| 68|
## |40-49 | 179| 247|
## |50-59 | 624| 871|
## |60-69 | 1732| 2603|
## |70-79 | 3897| 6500|
## |80+ | 12300| 18800|
## |NC | 0| 18800|
## |All | 18800| --|
##
##
## Table: Deceased by age group in Ceuta
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 2| 2|
## |30-39 | 0| 2|
## |40-49 | 4| 6|
## |50-59 | 9| 15|
## |60-69 | 42| 57|
## |70-79 | 34| 91|
## |80+ | 55| 146|
## |NC | 0| 146|
## |All | 146| --|
##
##
## Table: Deceased by age group in Comunitat Valenciana
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 2| 2|
## |20-29 | 8| 10|
## |30-39 | 31| 41|
## |40-49 | 114| 155|
## |50-59 | 375| 530|
## |60-69 | 1019| 1549|
## |70-79 | 2133| 3682|
## |80+ | 5449| 9131|
## |NC | 0| 9131|
## |All | 9131| --|
##
##
## Table: Deceased by age group in Extremadura
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 1| 1|
## |10-19 | 1| 2|
## |20-29 | 9| 11|
## |30-39 | 19| 30|
## |40-49 | 36| 66|
## |50-59 | 182| 248|
## |60-69 | 404| 652|
## |70-79 | 708| 1360|
## |80+ | 1592| 2952|
## |NC | 1| 2953|
## |All | 2953| --|
##
##
## Table: Deceased by age group in Galicia
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 1| 1|
## |30-39 | 5| 6|
## |40-49 | 14| 20|
## |50-59 | 100| 120|
## |60-69 | 213| 333|
## |70-79 | 619| 952|
## |80+ | 2246| 3198|
## |NC | 0| 3198|
## |All | 3198| --|
##
##
## Table: Deceased by age group in Madrid, Comunidad de
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 5| 5|
## |10-19 | 5| 10|
## |20-29 | 21| 31|
## |30-39 | 49| 80|
## |40-49 | 243| 323|
## |50-59 | 760| 1083|
## |60-69 | 2045| 3128|
## |70-79 | 4527| 7655|
## |80+ | 11278| 18933|
## |NC | 0| 18933|
## |All | 18933| --|
##
##
## Table: Deceased by age group in Melilla
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 0| 0|
## |30-39 | 2| 2|
## |40-49 | 4| 6|
## |50-59 | 16| 22|
## |60-69 | 21| 43|
## |70-79 | 36| 79|
## |80+ | 75| 154|
## |NC | 0| 154|
## |All | 154| --|
##
##
## Table: Deceased by age group in Murcia, Región de
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 1| 1|
## |30-39 | 7| 8|
## |40-49 | 39| 47|
## |50-59 | 105| 152|
## |60-69 | 258| 410|
## |70-79 | 431| 841|
## |80+ | 1375| 2216|
## |NC | 0| 2216|
## |All | 2216| --|
##
##
## Table: Deceased by age group in Navarra, Comunidad Foral de
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 0| 0|
## |20-29 | 0| 0|
## |30-39 | 2| 2|
## |40-49 | 5| 7|
## |50-59 | 15| 22|
## |60-69 | 22| 44|
## |70-79 | 60| 104|
## |80+ | 142| 246|
## |NC | 184| 430|
## |All | 430| --|
##
##
## Table: Deceased by age group in País Vasco
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 2| 2|
## |10-19 | 0| 2|
## |20-29 | 6| 8|
## |30-39 | 23| 31|
## |40-49 | 57| 88|
## |50-59 | 179| 267|
## |60-69 | 507| 774|
## |70-79 | 1193| 1967|
## |80+ | 4049| 6016|
## |NC | 0| 6016|
## |All | 6016| --|
##
##
## Table: Deceased by age group in Rioja, La
##
## | | Deaths| Cum. deaths|
## |:-----|------:|-----------:|
## |0-9 | 0| 0|
## |10-19 | 1| 1|
## |20-29 | 0| 1|
## |30-39 | 4| 5|
## |40-49 | 15| 20|
## |50-59 | 24| 44|
## |60-69 | 61| 105|
## |70-79 | 180| 285|
## |80+ | 627| 912|
## |NC | 0| 912|
## |All | 912| --|
for(j in 1:length(ccaas)){
print(ccaas[j])
=ggplot(data=dataMOMO_CCAAge[dataMOMO_CCAAge$nombre_ambito==ccaas[j],])+
faes(x=yearday,y=defunciones_observadas,colour=year)+
geom_ribbon(
aes(ymin=defunciones_estimadas_base_q01,
ymax=defunciones_estimadas_base_q99,
group=year),
fill="grey")+
geom_point(size=3)+
facet_grid(cod_gedad~.,scales="free_y")+
xlab("Month")+
ylab("Daily deaths")+
scale_x_discrete(
breaks=breaks,
labels = labels
+
)theme(axis.text.x = element_text(hjust=1))+
ggtitle(paste("Data from MoMo in ",levels(dataMOMO_CCAAge$nombre_ambito)[j]," (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
dir.create(gsub(" ",
"",paste("RGraphs/",ccaas[j])))
ggsave(gsub(" ",
"",
paste("RGraphs/",ccaas[j],"/MOMO_",substr(ccaas[j], 1, 3),"_byAge.png"),
fixed = TRUE),
width=30,
height=30,
units = "cm")
print(f)
=ggplot(data=dataMOMO_CCAAge[dataMOMO_CCAAge$nombre_ambito==ccaas[j],])+
faes(x=month,y=defunciones_observadas,colour=year)+
geom_ribbon(
aes(ymin=defunciones_estimadas_base_q01,
ymax=defunciones_estimadas_base_q99,
group=year),
fill="grey")+
geom_boxplot()+
facet_grid(cod_gedad~.,scales="free_y")+
xlab("Month")+
ylab("Daily deaths")+
scale_x_discrete(
labels = labels
+
)ggtitle(paste("Data from MoMo in ",levels(dataMOMO_CCAAge$nombre_ambito)[j]," (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/",ccaas[j],"/MOMO_Boxplot_",substr(ccaas[j], 1, 3),"_byAge.png"),
fixed = TRUE),
width=30,
height=30,
units = "cm")
print(f)
=ggplot(data=dataMOMO_mesesCCAAge[dataMOMO_mesesCCAAge$nombre_ambito==ccaas[j],])+
faes(x=month,fill=year,y=num_def)+
#geom_jitter(aes(shape=year))+
geom_bar(stat="identity",position="dodge")+
geom_text(aes(label=num_def),position = position_dodge(width = 1),angle=90,hjust=1,size=4 )+
facet_grid(cod_gedad~.,scales="free")+
xlab("Months")+
ylab("Monthly deaths")+
scale_x_discrete(
labels =labels
+
)ggtitle(paste("Data from MoMo in ",ccaas[j]," (",Sys.Date(),")"))+
theme(plot.title = element_text(hjust = 0.5),text = element_text(size = 20))
ggsave(gsub(" ",
"",
paste("RGraphs/",ccaas[j],"/MOMO_Barplot.png"),
fixed = TRUE),
width=30,
height=30,
units = "cm")
print(f)
}
## [1] "Andalucía"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Andalucía' already exists
## [1] "Aragón"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Aragón' already exists
## [1] "Asturias, Principado de"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Asturias,Principadode' already exists
## [1] "Balears, Illes"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Balears,Illes' already exists
## [1] "Canarias"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Canarias' already exists
## [1] "Cantabria"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Cantabria' already exists
## [1] "Castilla - La Mancha"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Castilla-LaMancha' already exists
## [1] "Castilla y León"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\CastillayLeón' already exists
## [1] "Cataluña"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Cataluña' already exists
## [1] "Ceuta"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Ceuta' already exists
## [1] "Comunitat Valenciana"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\ComunitatValenciana' already exists
## [1] "Extremadura"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Extremadura' already exists
## [1] "Galicia"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Galicia' already exists
## [1] "Madrid, Comunidad de"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Madrid,Comunidadde' already exists
## [1] "Melilla"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Melilla' already exists
## [1] "Murcia, Región de"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Murcia,Regiónde' already exists
## [1] "Navarra, Comunidad Foral de"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Navarra,ComunidadForalde' already exists
## [1] "País Vasco"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\PaísVasco' already exists
## [1] "Rioja, La"
## Warning in dir.create(gsub(" ", "", paste("RGraphs/", ccaas[j]))):
## 'RGraphs\Rioja,La' already exists