Bechdel Test

Last updated on Mar 9, 2021 4 min read tidytuesday

Load Packages
Read in data
Top Plot
- Annotate it
Bottom Plot
Plots Assemble!

Load Packages

library(tidyverse)
library(patchwork)
library(showtext)
font_add_google("Righteous", "Righteous")
showtext_auto()

Read in data

There’s a miscalculation where the obscure Frozen (2010) has the same reported box office as the Disney musical Frozen (2013). I’m too lazy to look up what its real box office was, so I just removed it.

movies_detail <- readr::read_csv('https://github.com/rfordatascience/tidytuesday/blob/master/data/2021/2021-03-09/movies.csv?raw=true', na = c("NA", "#N/A")) %>%
  mutate(binary = binary == "PASS") %>%
  filter(!(year == 2010 & title == "Frozen"))

movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/raw_bechdel.csv')

Top Plot

top_pct_lustrum <- movies_detail %>%
  drop_na(domgross_2013) %>%
  mutate(lustrum = cut_width(year, 5, boundary = 1975)) %>%
  group_by(lustrum) %>%
  arrange(desc(domgross_2013)) %>%
  mutate(rank = rank(domgross_2013))  %>%
  mutate(rank = rank/max(rank)) %>%
  mutate(pct_gross = domgross_2013/sum(domgross_2013)) %>%
  filter(rank >= 0.9) %>%
  summarize(
    pct_lustrum = mean(binary, na.rm =T), 
    pct_lustrum_gross = sum(pct_gross)
    ) %>%
  ungroup() 

top_pct_year <- movies_detail %>%
  drop_na(domgross_2013) %>%
  mutate(lustrum = cut_width(year, 5, boundary = 1975)) %>%
  group_by(year, lustrum) %>%
  mutate(rank = rank(domgross_2013)) %>%
  mutate(rank = rank/max(rank)) %>%
  mutate(pct_gross = domgross_2013/sum(domgross_2013)) %>%
  filter(rank >= 0.75) %>%
  summarize(
    pct_year = mean(binary, na.rm =T), 
    pct_year_gross = sum(pct_gross)
    ) %>%
  ungroup() 

p1 <- top_pct_lustrum %>%
  inner_join(top_pct_year) %>%
  ggplot(aes(x = year)) + 
  geom_col(
    aes(y = pct_lustrum_gross), 
    fill = 'grey90', width = 1, alpha = 1/2
  ) + 
  geom_point(aes(y = pct_year), 
    shape = 3, 
    size = 3, 
    color = 'grey70') + 
  geom_line(aes(y = pct_lustrum), color = 'grey20') + 
  theme_minimal(base_size = 14, base_family = "sans") + 
  scale_x_continuous(expand = expansion(add = c(1,5))) + 
  scale_y_continuous(labels = scales::label_percent()) + 
  labs(
    y = NULL,
    x = 'top grossing movies of half-decade (bold pass test)') + 
  theme(
    axis.title = 
      element_text(
        family = "Righteous", 
        face = 'bold', 
        hjust = 0, 
        size = 16, 
        color = 'orangered2',
        ), 
    panel.grid.major.y = element_blank(), 
    panel.grid.minor.y = element_blank()
  )

p1

Annotate it

p1 <- p1 + 
  annotate(
    geom = 'curve',
    xend = 1984.75, yend = .01, x = 1982, y = 0.5,
    curvature = 0.15,ncp = 9, lwd = 1/2,
    arrow = arrow(length = unit(1/2, 'lines')), 
    color = 'orangered1'
    )  + 
  annotate(
    geom = 'text', 
    family = "Righteous",
    x = 1981, 
    y = 0.51, 
    size= 4.25,
    label = "The year Alison Bechdel proposed her now famous test was in the midst of a decade-long drought for female representation in popular movies" %>%
      str_wrap(30), hjust = 0, vjust = 0, color = 'orangered1'
  ) + 
  geom_vline(
    xintercept = 1980,
    color = 'grey80', 
    alpha = 1/2,
    lty = 2, 
    size = 1
  ) + 
  annotate(
    geom = 'text',
    x = 1979.85, 
    y = 0.80, 
    label = 'data before this date is sparse and less accurate', 
    angle = 90, 
    hjust = 1/2,
    vjust = 0,
    size = 2.75,
    color = 'grey80'
  ) + 
  annotate(
    geom = 'text',
    x = 1970.05, 
    y = .358,
    hjust = 0, 
    vjust = 1, 
    color = 'grey30',
    size = 2.75, 
    label = 'top 10% grossing movies in \nhalf-decade which pass test'
  ) + 
  annotate(
    geom = 'curve',
    xend = 1970.2, yend = .98, x = 1971, y = 0.9,
    curvature = 0.15, ncp = 9, lwd = 1/2,
    arrow = arrow(length = unit(1/3, 'lines')), 
    color = 'grey70'
    )  + 
  annotate(
    geom = 'text',
    x = 1971, 
    y = .89,
    hjust = 0, 
    vjust = 1, 
    color = 'grey70',
    size = 2.75, 
    label = 'top 50% grossing movies in\nyear which pass test'
  ) + 
  annotate(
    geom = 'text', 
    color = 'grey75', 
    x = 1975.75, 
    y = 0.46, 
    hjust = 0, 
    vjust = 1,
    size = 2.5, 
    label = 'percent USA box office for\ntop 10% grossing movies'
  )
  

p1

Bottom Plot

p2 <- movies_detail %>%
  drop_na(domgross_2013) %>%
  mutate(lustrum = cut_width(year, 5, boundary = 1975)) %>%
  group_by(lustrum) %>%
  mutate(rank = rank(domgross_2013)) %>%
  mutate(rank = rank/max(rank)) %>%
  arrange(desc(rank)) %>%
  slice(1:5) %>%
  select(lustrum, title, binary) %>%
  mutate(
    title = title %>%
      str_replace("&#39;", "'") %>%
      str_remove_all("Star Wars: Episode .*- ") %>%
      str_remove_all("The Lord of the Rings: ") 
  ) %>%
  mutate(
    rank = rank(str_length(title), ties.method = 'first'), 
    title = str_wrap(title, width = 25)
    ) %>%
  ggplot(aes(x = lustrum, y = rank)) + 
  geom_text(aes(label = title, color = binary), 
    hjust = 0, vjust = 1, size = 3, lineheight = 3/4) + 
  theme_void(base_family = "sans") + 
  scale_x_discrete(expand = expansion(add = c(1/5,8/5))) + 
  scale_y_discrete(expand = expansion(add = c(3/5,1/5))) + 
  scale_color_manual(
    values = c('TRUE' = 'grey20', 'FALSE' = 'grey65'), 
    guide = guide_none()) + 
  labs(
    caption = "A #TidyTuesday adventure by @tannerkoomar\nData from @FiveThirtyEight"
  )

p2

Plots Assemble!

final_plot <- wrap_plots(
  list(p1, p2), ncol = 1, heights = c(4, 1)
  )

final_plot

R data visualization