R is a very powerful software for data visualization. In this post, I present a simple case of how data can be visualized in Bar Chart Race in R. I have used the COVID cases data by country and showed the evolution of COVID cases in the 10 most affected countries during the last 350 days.
rm(list=ls()) # removes the existing objects from the environment.
# library used
library(tidyverse)
library(readxl)
library(dplyr)
library(gganimate)
# setting working directory
setwd('C:/Users/siddhabhatta/Desktop/October31')
# data source : https://www.ecdc.europa.eu/en/covid-19/data
# importing data
The data and R script can be downloaded from the link below:
https://drive.google.com/drive/folders/1HkPFE7v4fIx2rOOJnhE1E52rCQlGkigq?usp=sharing
data=read_excel('coviddec14.xlsx')
# first few observations
head(data)
# creating a new date variable with standard date format
data$date<-as.Date(data$dateRep, format="%m/%d/%y")
head(data$date)
# Making the country names short
data$country[data$country=="United_States_of_America"]<-"USA"
data$country[data$country=="United_Kingdom"]<-"UK"
data$country[data$country=="Cases_on_an_international_conveyance_Japan"]<-"Intl_CV_Center_Japan"
#groupoing the data by country and date and finding cumulated total of cases per day
datanew<-data %>% # %>% can be read as then
select(country, cases, date, continent) %>%
group_by(continent, country, date) %>%
summarise(total=sum(cases)) %>%
mutate(cumtotal=cumsum(total))
# prepare data by ranks and filter the top 10 countries
data2=datanew %>%
group_by(date) %>%
arrange(date, -cumtotal) %>%
mutate(rank = 1:n()) %>%
filter(rank <= 10)
# producing the static 350 ggplots
data2 %>%
ggplot()+
aes(xmin = 0 ,
xmax = cumtotal) +
aes(ymin = rank - 0.45,
ymax = rank + 0.45,
y = rank) +
facet_wrap(~ date) +
geom_rect(alpha = .7) +
aes(fill = continent) +
scale_fill_viridis_d(option = "magma",
direction = -1) +
scale_x_continuous(
limits = c(-5000000, 16000000),
breaks = c(-5000000, 0, 4000000, 8000000, 12000000, 16000000)) +
geom_text(col = "darkblue",
hjust = "right",
aes(label = country),
x = -100) +
geom_text(col = "darkblue",
hjust = "right",
aes(label = paste(cumtotal), x=12000000)) +
scale_y_reverse() +
labs(fill = NULL) +
ggtitle("Evolution of Covid-19 Cases")+
labs(x = "Covid Cases") +
labs(y = "Top 10 Countries") +
theme_classic() ->
my_plot
# saves the plot in the object my_plot
# animate the 350 frames by date and save it as p
p<-my_plot +
facet_null() +
geom_text(x = 8000000 , y = -10,
family = "Times",
aes(label = as.character(date)),
size = 12, col = "green") +
aes(group = country) +
transition_time(date)
#Animate p with total 350 frames and 5 frames per second
animate(p, nframes=350, fps=5, width=1000)
Saving the results as gif format
gif<- animate(p, fps = 5, width = 1000, height = 700,
renderer = gifski_renderer("gganim.gif"), end_pause = 15, start_pause = 15)
anim_save("gganim.gif", animation = gif )
Here is the output.
And here is the video explanation.
No comments:
Post a Comment