Project title

Appendix to report

Data cleaning

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.0
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.2.1     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(skimr)

library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
✔ broom        1.0.2     ✔ rsample      1.1.1
✔ dials        1.1.0     ✔ tune         1.1.1
✔ infer        1.0.4     ✔ workflows    1.1.2
✔ modeldata    1.0.1     ✔ workflowsets 1.0.0
✔ parsnip      1.0.3     ✔ yardstick    1.1.0
✔ recipes      1.0.6     
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Use tidymodels_prefer() to resolve common conflicts.
#replace blank and 0 values with NA
billionaires_final <- read.csv("data/billionaires.csv", na.strings = c("", "0"))

#rename private equity
billionaires_final <- billionaires_final |>
  mutate(
    wealth.how.industry = ifelse(wealth.how.industry == "Private equity/leveraged buyout", "Private equity", wealth.how.industry)
  )
#Filtered to include only 2014 and 1996, also drops values in the 

#wealth.how.industry column that is missing 

billionaires_final <-
  billionaires_final |>
  filter(year %in% c(2014, 1996), !is.na(wealth.how.industry)) |>

  #Mutated so that wealth.how.industry says "yes" if value in the "Technology-Computer" industry

  # Says "no" if value not in the industry

  mutate(
    wealth.how.industry = ifelse(wealth.how.industry == "Technology-Computer", "yes", "no"),
   year = as.factor(year),
    year = fct_relevel(.f = year, "2014" , "1996"),
    wealth.how.industry = as.factor(wealth.how.industry))
#Makes a column to add if wealth was inherited or not inherited 

billionaires_final <-
  billionaires_final |>
    mutate(inherited = if_else(str_detect(wealth.how.inherited, "not inherited"), 

                             "not inherited", "inherited")) |>

  filter(year %in% c(2014, 1996)) |>
  mutate(
    year = as.factor(year),
    year = fct_relevel(.f = year, "2014" , "1996"))
write.csv(billionaires_final, "data/billionaires_final")
  1. We replaced blank and 0 values with NA
  2. We renamed the “Private equity/leveraged buyout” value for the wealth.how.industry variable to “Private equity”
  3. We filtered out rows that had an NA value for the wealth.how.industry variable
  4. We made a new variable inherited that describes if the billionaire inherited their wealth