US Mental Health

Appendix to report

Data cleaning

library(tidyverse)
library(skimr)
library(janitor)
library(tidymodels)
age_mh1 <- read_csv("data/Final_Data/AGE x MH1.csv")
Rows: 240 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Age (recoded), Mental health diagnosis one, Total % SE, Total % CI...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Cleaning data for Age and Mental Health diagnosis
age_mh1 <- age_mh1|>
  clean_names()|>
  select(age_recoded,mental_health_diagnosis_one,unweighted_count)|>
  filter(age_recoded!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(age_recoded!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(age_mh1))|>
  rename(
    age_count=unweighted_count
  )
write_csv(age_mh1,file = "data/Final_Data/Clean_Data/AGE_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected age_recoded,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all age_recorded = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all age_recoded =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to age_count to reflect that dataset deals with ages of persons who took the survey

educ_mh1 <- read_csv("data/Final_Data/EDUC x MH1.csv")
Rows: 105 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Education, Mental health diagnosis one, Total % SE, Total % CI (lo...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
educ_mh1 <- educ_mh1 |>
  clean_names()|>
  select(education,mental_health_diagnosis_one,unweighted_count)|>
   select(education,mental_health_diagnosis_one,unweighted_count)|>
  filter(education!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(education!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(educ_mh1))|>
  rename(
    edu_count=unweighted_count
  )

write_csv(educ_mh1,file = "data/Final_Data/Clean_Data/EDUC_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected education,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all education = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all education =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to educ_count to reflect that dataset deals with education levels of persons who took the survey

ethn_mh1 <- read_csv("data/Final_Data/ETHNIC x MH1.csv")
Rows: 90 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Hispanic or Latino origin (ethnicity), Mental health diagnosis one...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ethn_mh1 <- ethn_mh1|>
  clean_names()|>
  rename(
    ethnicity=hispanic_or_latino_origin_ethnicity
  )|>
   select(ethnicity,mental_health_diagnosis_one,unweighted_count)|>
   select(ethnicity,mental_health_diagnosis_one,unweighted_count)|>
  filter(ethnicity!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(ethnicity!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(ethn_mh1))|>
  rename(
    ethn_count=unweighted_count
  )

write_csv(ethn_mh1,file = "data/Final_Data/Clean_Data/ETHN_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

2.Renamed hispanic_or_latino_origin_ethnicity to ethnicity

  1. selected ethnicity,mental_health_diagnosis_one,unweighted_count from columns

  2. Filtered out all ethnicity = to Overall

  3. Filtered out all mental_health_diagnosis_one =Overall

  4. Filtered out all ethnicity =Missing/unknown/not collected/invalid

  5. Removed first row to get full set of data

  6. Changed unweight_count to ethn_count to reflect that dataset deals with ethnicity of persons who took the survey

gen_mh1 <- read_csv("data/Final_Data/GENDER x MH1.csv")
Rows: 60 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Sex, Mental health diagnosis one, Total % SE, Total % CI (lower), ...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gen_mh1 <- gen_mh1 |>
  clean_names()|>
  select(sex,mental_health_diagnosis_one,unweighted_count)|>
   select(sex,mental_health_diagnosis_one,unweighted_count)|>
  filter(sex!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(sex!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(gen_mh1))|>
  rename(
    sex_count=unweighted_count
  )

write_csv(gen_mh1,file = "data/Final_Data/Clean_Data/GENDER_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected sex,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all sex = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all sex =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to sex_count to reflect that dataset deals with the gender of persons who took the survey

race_mh1 <- read_csv("data/Final_Data/RACE x MH1.csv")
Rows: 120 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Race, Mental health diagnosis one, Total % SE, Total % CI (lower),...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_mh1 <- race_mh1 |>
  clean_names()|>
  select(race,mental_health_diagnosis_one,unweighted_count)|>
  filter(race!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(race!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(race_mh1))|>
  rename(
    race_count=unweighted_count
  )

write_csv(race_mh1,file = "data/Final_Data/Clean_Data/RACE_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected race,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all race = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all race =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to race_count to reflect that dataset deals with the different races of persons who took the survey

stat_mh1 <- read_csv("data/Final_Data/STATEFIP x MH1.csv")
Rows: 750 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Reporting state code, Mental health diagnosis one, Total % SE, Tot...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stat_mh1 <- stat_mh1 |>
  clean_names()|>
  select(reporting_state_code,mental_health_diagnosis_one,unweighted_count)|>
  filter(reporting_state_code!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(reporting_state_code!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(stat_mh1))|>
  rename(
    stat_count=unweighted_count
  )

write_csv(stat_mh1,file = "data/Final_Data/Clean_Data/STATE_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected reporting_state_code,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all reporting_state_code = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all reporting_state_code =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to stat_count to reflect that dataset deals with different states the persons who took the survey came from

sub_mh1 <- read_csv("data/Final_Data/SUB x MH1.csv")
Rows: 225 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Substance use diagnosis, Mental health diagnosis one, Total % SE, ...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sub_mh1 <- sub_mh1 |>
  clean_names()|>
  select(substance_use_diagnosis,mental_health_diagnosis_one,unweighted_count)|>
  filter(substance_use_diagnosis!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(substance_use_diagnosis!="Missing/unknown/not collected/invalid/no or deferred diagnosis")|>
  slice(2:nrow(sub_mh1))|>
  rename(
    sub_count=unweighted_count
  )

write_csv(sub_mh1,file = "data/Final_Data/Clean_Data/SUB_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected substance_use_diagnosis,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all substance_use_diagnosis = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all substance_use_diagnosis =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to sub_count to reflect that dataset deals with different subtances the persons who took the survey may have abused

vet_mh1 <- read_csv("data/Final_Data/VETERAN x MH1.csv")
Rows: 60 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Veteran status, Mental health diagnosis one, Total % SE, Total % C...
dbl  (4): Total %, Row %, Column %, Unweighted Count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vet_mh1 <- vet_mh1 |>
  clean_names()|>
  select(veteran_status,mental_health_diagnosis_one,unweighted_count)|>
  filter(veteran_status!="Overall")|>
  filter(mental_health_diagnosis_one!="Overall")|>
  filter(veteran_status!="Missing/unknown/not collected/invalid")|>
  slice(2:nrow(vet_mh1))|>
  rename(
    vet_count=unweighted_count
  )

write_csv(vet_mh1,file = "data/Final_Data/Clean_Data/VET_MH1_CLEAN.csv")
  1. The first thing that is done to the data after it has been read, is the name of the columns are converted to follow the appropriate syntax

  2. selected veteran_status,mental_health_diagnosis_one,unweighted_count from columns

  3. Filtered out all veteran_status = to Overall

  4. Filtered out all mental_health_diagnosis_one =Overall

  5. Filtered out all veteran_status =Missing/unknown/not collected/invalid

  6. Removed first row to get full set of data

  7. Changed unweight_count to vet_count to reflect that dataset deals with different the people are veterans that took the survey

Other appendicies (as necessary)

# convert substance frequency table to data frame
sub_freq <- read_csv("data/Final_Data/Clean_Data/SUB_MH1_CLEAN.csv")
Rows: 181 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): substance_use_diagnosis, mental_health_diagnosis_one
dbl (1): sub_count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sub_df <- sub_freq[rep(1:nrow(sub_freq), sub_freq$sub_count), -3]
write_csv(sub_df, file = "data/Final_Data/Clean_Data/SUB_MH1_DF.csv")

# convert education frequency table to data frame
ed_freq <- read_csv("data/Final_Data/Clean_Data/EDUC_MH1_CLEAN.csv")
Rows: 69 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): education, mental_health_diagnosis_one
dbl (1): edu_count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ed_df <- ed_freq[rep(1:nrow(ed_freq), ed_freq$edu_count), -3]
write_csv(ed_df, file = "data/Final_Data/Clean_Data/EDUC_MH1_DF.csv")

# convert race frequency table to data frame
race_freq <- read_csv("data/Final_Data/Clean_Data/RACE_MH1_CLEAN.csv")
Rows: 83 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): race, mental_health_diagnosis_one
dbl (1): race_count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_df <- race_freq[rep(1:nrow(race_freq), race_freq$race_count), -3]
write_csv(race_df, file = "data/Final_Data/Clean_Data/RACE_MH1_DF.csv")

# convert substane frequency table to data frame
vet_freq <- read_csv("data/Final_Data/Clean_Data/VET_MH1_CLEAN.csv")
Rows: 27 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): veteran_status, mental_health_diagnosis_one
dbl (1): vet_count

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
vet_df <- vet_freq[rep(1:nrow(vet_freq), vet_freq$vet_count), -3]
write_csv(vet_df, file = "data/Final_Data/Clean_Data/VET_MH1_DF.csv")