## ---- echo = FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE ) scale_fill_discrete <- function(...) scale_fill_brewer(... , type = "div", palette="Set1", na.value = "grey50") ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, message=FALSE, warning=FALSE) library(fivethirtyeight) # library(tidyverse) library(ggplot2) library(dplyr) library(broom) library(knitr) library(patchwork) ## ----datascience, echo=FALSE, fig.cap = "Grolemund and Wickham's 'Data/Science Pipeline'", out.width = "75%", fig.align='center'---- knitr::include_graphics("images/data_science_pipeline.png") ## ---- eval=FALSE-------------------------------------------------------------- # library(fivethirtyeight) # head(bechdel) # ?bechdel # # If using RStudio: # View(bechdel) ## ---- eval=FALSE-------------------------------------------------------------- # vignette("fivethirtyeight", package="fivethirtyeight") ## ---- eval=FALSE-------------------------------------------------------------- # library(fivethirtyeight) # ?hate_crimes # head(hate_crimes) # # If using RStudio: # View(hate_crimes) ## ----------------------------------------------------------------------------- summary(hate_crimes$gini_index) summary(hate_crimes$hate_crimes_per_100k_splc) ## ----hate-crime, fig.height=9/2, fig.width=16/2.5, fig.align='center', fig.cap="Relationship between hate crime incidence and income inequality."---- plot(hate_crimes$gini_index, hate_crimes$hate_crimes_per_100k_splc, xlab = "Gini index", ylab = "Hate crime rate per 100K") title("Hate Crimes per 100K Nov 9-18 2016 (SPLC)") ## ---- eval=FALSE-------------------------------------------------------------- # lm(hate_crimes_per_100k_splc ~ gini_index, data = hate_crimes) ## ---- echo=FALSE-------------------------------------------------------------- library(broom) lm(hate_crimes_per_100k_splc ~ gini_index, data=hate_crimes) %>% tidy(conf.int=TRUE) %>% kable(digits = 3) ## ---- eval=FALSE-------------------------------------------------------------- # vignette("bechdel", package="fivethirtyeight") ## ---- eval=FALSE-------------------------------------------------------------- # vignette("user_contributed_vignettes", package="fivethirtyeight") ## ---- eval=FALSE-------------------------------------------------------------- # # If you haven't installed remotes package yet, do so via: # # install.packages("remotes") # remotes::install_github("rudeboybert/fivethirtyeight", build_vignettes = TRUE) ## ---- eval=FALSE-------------------------------------------------------------- # library(readr) # flying_raw <- read_csv("https://bit.ly/2vg8gTf") # colnames(flying_raw)[1:5] ## ----------------------------------------------------------------------------- library(fivethirtyeight) colnames(flying)[1:5] ## ---- eval=FALSE-------------------------------------------------------------- # # Using raw data: # ggplot(flying_raw, # aes(x = `Do you have any children under 18?`, # fill = `In general, is itrude to bring a baby on a plane?`)) + # geom_bar(position = "fill") + # labs(x = "Children under 18?", y = "Proportion", fill = "Is it rude?") # # # Using fivethirtyeight package data: # ggplot(flying, aes(x = children_under_18, fill = baby)) + # geom_bar(position = "fill") + # labs(x = "Children under 18?", y = "Proportion", fill = "Is it rude?") ## ----babies, fig.width=16/2.5, fig.height=9/2.5, fig.align='center', echo=FALSE, fig.cap="Attitudes about bringing babies on a flight."---- ggplot(flying, aes(x = children_under_18, fill = baby)) + geom_bar(position = "fill") + labs(x = "Do you have children under 18?", y = "Proportion", fill = "Is it rude?") ## ---- eval=FALSE-------------------------------------------------------------- # library(readr) # US_births_1999_2003_raw <- read_csv("https://bit.ly/2vgRFiw") # US_births_1999_raw <- US_births_1999_2003_raw[US_births_1999_2003_raw$year == 1999, ] # head(US_births_1999_raw) ## ----US-births, fig.width=16/2.5, fig.height=9/2, fig.align='center', fig.cap="Number of US births in 1999."---- library(fivethirtyeight) US_births_1999 <- US_births_1994_2003[US_births_1994_2003$year == 1999, ] head(US_births_1999) plot(US_births_1999$date, US_births_1999$births, type = "l", xlab = "Date", ylab = "# of births") ## ----------------------------------------------------------------------------- head(US_births_1999[which.max(US_births_1999$births), ]) ## ----bechdel-barplot-orig, echo=FALSE, fig.cap = "Original Bechdel barplot in FiveThirtyEight Article", out.width = "75%", fig.align='center'---- knitr::include_graphics("images/hickey-bechdel-11.png") ## ---- eval=FALSE-------------------------------------------------------------- # year_bins <- c("'70-'74", "'75-'79", "'80-'84", "'85-'89", "'90-'94", # "'95-'99", "'00-'04", "'05-'09", "'10-'13") # # # Using raw data: # library(readr) # bechdel_raw <- read_csv("https://bit.ly/2uD3ls6") %>% # mutate(five_year = cut(year, breaks = seq(1969, 2014, 5), labels = year_bins)) # # ggplot(bechdel_raw, aes(x = five_year, fill = clean_test)) + # geom_bar(position = "fill", color = "black") + # labs(x = "Year", y = "Proportion", fill = "Bechdel Test") + # scale_fill_brewer(palette = "YlGnBu") # # # Using fivethirtyeight package data: # library(fivethirtyeight) # bechdel <- bechdel %>% # mutate(five_year = cut(year, breaks = seq(1969, 2014, 5), labels = year_bins)) # # ggplot(bechdel, aes(x = five_year, fill = clean_test)) + # geom_bar(position = "fill", color = "black") + # labs(x = "Year", y = "Proportion", fill = "Bechdel Test") + # scale_fill_brewer(palette = "YlGnBu") ## ---- eval=FALSE, echo=FALSE-------------------------------------------------- # # This code is a repeat of the above code and is hidden from the user. It saves # # a png output of the figure. We do this b/c bit.ly short link redirects are # # unstable in CRAN # year_bins <- c("'70-'74", "'75-'79", "'80-'84", "'85-'89", "'90-'94", # "'95-'99", "'00-'04", "'05-'09", "'10-'13") # # # Raw data does not order the test results intuitively # bechdel_raw <- read_csv("https://bit.ly/2uD3ls6") %>% # mutate(five_year = cut(year, breaks = seq(1969, 2014, 5), labels = year_bins)) # # plot1<- ggplot(bechdel_raw, aes(x = five_year, fill = clean_test)) + # geom_bar(position = "fill", color = "black") + # labs(x = "Year", y = "Proportion", fill = "Bechdel Test", # title = "Using raw data") + # scale_fill_brewer(palette="YlGnBu") # # # Package data has intuitive hierarchical ordering of bechdel test # bechdel <- bechdel %>% # mutate(five_year = cut(year, breaks = seq(1969, 2014, 5), labels = year_bins)) # # plot2 <- ggplot(bechdel, aes(x = five_year, fill = clean_test)) + # geom_bar(position = "fill", color = "black") + # labs(x = "Year", y = "Proportion", fill = "Bechdel Test", # title = "Using fivethirtyeight package data")+ # scale_fill_brewer(palette="YlGnBu") # # bechdel_plot <- plot1 / plot2 # # ggsave("vignettes/images/bechdel.png", plot = bechdel_plot, height = 6.5, width = 16/2.5) ## ----bechdel, out.width= "100%", echo=FALSE, fig.align='center', fig.cap="Barcharts of Bechdel Test results across time."---- knitr::include_graphics("images/bechdel.png") ## ---- fig.width=16/2.5, fig.height=9/2---------------------------------------- library(dplyr) library(fivethirtyeight) drinks %>% filter(country %in% c("USA", "France")) ## ---- fig.width=16/2.5, fig.height=9/2--------------------------------------- library(tidyr) drinks_tidy_US_FR <- drinks %>% filter(country %in% c("USA", "France")) %>% gather(type, servings, -c(country, total_litres_of_pure_alcohol)) drinks_tidy_US_FR ## ----drinks, fig.width=16/2.5, fig.height=9/2.5, fig.align='center', fig.cap="USA vs France alcohol consumption."---- ggplot(drinks_tidy_US_FR, aes(x=type, y=servings, fill=country)) + geom_col(position = "dodge") + labs(x = "Alcohol type", y = "Average number of servings")