Project title

Appendix to report

Data cleaning

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.0
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.2.1     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(skimr)
mask_use_by_county <- read_csv("data/mask-use-by-county.csv")
Rows: 3142 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): COUNTYFP
dbl (5): NEVER, RARELY, SOMETIMES, FREQUENTLY, ALWAYS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fips = list(
  "01" = "AL",
  "02" = "AK",
  "04" = "AZ",
  "05" = "AR",
  "06" = "CA",
  "08" = "CO",
  "09" = "CT",
  "10" = "DE",
  "11" = "DC",
  "12" = "FL",
  "13" = "GA",
  "15" = "HI",
  "16" = "ID",
  "17" = "IL",
  "18" = "IN",
  "19" = "IA",
  "20" = "KS",
  "21" = "KY",
  "22" = "LA",
  "23" = "ME",
  "24" = "MD",
  "25" = "MA",
  "26" = "MI",
  "27" = "MN",
  "28" = "MS",
  "29" = "MO",
  "30" = "MT",
  "31" = "NE",
  "32" = "NV",
  "33" = "NH",
  "34" = "NJ",
  "35" = "NM",
  "36" = "NY",
  "37" = "NC",
  "38" = "ND",
  "39" = "OH",
  "40" = "OK",
  "41" = "OR",
  "42" = "PA",
  "44" = "RI",
  "45" = "SC",
  "46" = "SD",
  "47" = "TN",
  "48" = "TX",
  "49" = "UT",
  "50" = "VT",
  "51" = "VA",
  "53" = "WA",
  "54" = "WV",
  "55" = "WI",
  "56" = "WY"
)
red_and_blue = list(
  "AL" = "red",
  "AK" = "red",
  "AZ" = "blue",
  "AR" = "red",
  "CA" = "blue",
  "CO" = "blue",
  "CT" = "blue",
  "DE" = "blue",
  "FL" = "red",
  "GA" = "blue",
  "HI" = "blue",
  "ID" = "red",
  "IL" = "blue",
  "IN" = "red",
  "IA" = "red",
  "KS" = "red",
  "KY" = "red",
  "LA" = "red",
  "ME" = "blue",
  "MD" = "blue",
  "MA" = "blue",
  "MI" = "blue",
  "MN" = "blue",
  "MS" = "red",
  "MO" = "red",
  "MT" = "red",
  "NE" = "red",
  "NV" = "blue",
  "NH" = "blue",
  "NJ" = "blue",
  "NM" = "blue",
  "NY" = "blue",
  "NC" = "red",
  "ND" = "red",
  "OH" = "red",
  "OK" = "red",
  "OR" = "blue",
  "PA" = "blue",
  "RI" = "blue",
  "SC" = "red",
  "SD" = "red",
  "TN" = "red",
  "TX" = "red",
  "UT" = "red",
  "VT" = "blue",
  "VA" = "blue",
  "WA" = "blue",
  "WV" = "red",
  "WI" = "blue",
  "WY" = "red",
  "DC" = "blue"
)
map_column <- function(df, column_name, my_map) {
  # df <- na.omit(df)
  column_vector <- df[[column_name]]
  sliced_strings <- substr(column_vector, 1, 2)
  output_column <- sapply(sliced_strings, function(x) my_map[x])
  df$output_column <- output_column
  return(df)
}
new_df =  map_column(mask_use_by_county, "COUNTYFP", fips)
map_to_list <- function(df, col_name, my_list) {
  my_col <- df[[col_name]]
  my_output <- lapply(my_col, function(x) my_list[[x]])
  df$new_col <- unlist(my_output)
  return(df)
}

newest_df = map_to_list(new_df, "output_column", red_and_blue)
election_data_pre = newest_df |>
  mutate(
    holistic_score = (FREQUENTLY + ALWAYS) - (NEVER + RARELY),
    not_wear_score = (NEVER + RARELY),
    wear_score = (FREQUENTLY + ALWAYS)
    )

rename = c("county_fips_code", "never", "rarely", "sometimes", "frequently", "always", "state_abbreviation", "state_color", "holistic_score",
           "not_wear_score", "wear_score")

colnames(election_data_pre) <- rename
election_data <- election_data_pre |>
  select("county_fips_code", "state_color", "state_abbreviation", "holistic_score", "holistic_score", "wear_score", "not_wear_score", "never", "rarely", "sometimes", "frequently", "always")

Other appendicies (as necessary)

library(tidyr)
library(ggplot2)
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
✔ broom        1.0.2     ✔ rsample      1.1.1
✔ dials        1.1.0     ✔ tune         1.1.1
✔ infer        1.0.4     ✔ workflows    1.1.2
✔ modeldata    1.0.1     ✔ workflowsets 1.0.0
✔ parsnip      1.0.3     ✔ yardstick    1.1.0
✔ recipes      1.0.6     
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Dig deeper into tidy modeling with R at https://www.tmwr.org
library(openintro)
Loading required package: airports
Loading required package: cherryblossom
Loading required package: usdata

Attaching package: 'openintro'
The following object is masked from 'package:modeldata':

    ames
source("df.R")
Rows: 3142 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): COUNTYFP
dbl (5): NEVER, RARELY, SOMETIMES, FREQUENTLY, ALWAYS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
election_summary <- election_data |> group_by(`state_color`) |>
  summarize(
    score_mean = mean(holistic_score),
    sd_score = sd(holistic_score),
    median_score = median(holistic_score)
  )

red_histogram <- election_data |> filter(`state_color` == "red") |>
  ggplot(
    aes(x = holistic_score)
  ) + geom_histogram() +
  xlim(-0.2, 1) + ylim(0, 200)

blue_histogram <- election_data |> filter(`state_color` == "blue") |>
  ggplot(
    aes(x = holistic_score)
  ) + geom_histogram() +
  xlim(-0.2, 1) + ylim(0, 200)

election_data_clone <- election_data

election_data_clone <- election_data_clone |>
  mutate(binary_state = ifelse(election_data_clone$state_color == "blue", 1, 0))

lin_fit <- linear_reg() |>
  fit(holistic_score ~ binary_state, data = election_data_clone)

set.seed(123)

t_dist <- t.test(holistic_score ~ state_color, data = election_data, alternative = "two.sided", mu = 0, paired = FALSE, conf.level = 0.95)