library(tidyverse)
library(readr)
library(skimr)
library(scales)
Home Owners’ Loan Corporation (HOLC) Grades and their Relationship to Racial Demographics
Appendix to report
Data cleaning
Setup
Loaded the packages we need for collecting and cleaning the data.
Cleaning the Data
Below we imported the raw data into a new data frame where we used separate() to make a column made up of city, state into two new columns containing the city and state respectively. We originally wanted to find the relationship between different regions of the United States and the percentage of their population that live within certain HOLC grades, and we cleaned the data in order to achieve this. However, we decided that we wanted to focus more on the relationship between HOLC grades and race, and ended up not using the region column that we made within our dataset.
<- read_csv("data/metro-grades.csv") |>
metro_grades separate(
col = metro_area,
into = c("metro_area_city", "metro_area_state"),
sep = "\\,"
)
Rows: 551 Columns: 28
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): metro_area, holc_grade
dbl (26): white_pop, black_pop, hisp_pop, asian_pop, other_pop, total_pop, p...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- metro_grades |>
metro_grades mutate(metro_area_region = case_when(
str_detect(string = metro_area_state,
pattern = "ME|VT|NH|MA|CT|FI|NY|NJ|PA") ~ "Northeast",
str_detect(string = metro_area_state,
pattern = "ND|SD|NE|KS|MN|IA|MO|WI|IL|IN|MI|OH") ~ "Midwest",
str_detect(string = metro_area_state,
pattern = "DE|MD|VA|WV|NC|SC|GA|FL|KY|TN|AL|MS|AR|LA|OK|TX") ~ "South",
str_detect(string = metro_area_state,
pattern = "WA|OR|CA|MT|ID|WY|NV|UT|CO|AZ|NM|HI|AK") ~ "West"
|>
)) relocate(metro_area_region, .after = metro_area_state)
Summary Statistics of HOLC Grades by Race
Summary of HOLC Grade A:
|>
metro_grades filter(holc_grade == "A") |>
select(pct_white:pct_other) |>
summary(metro_grades)
pct_white pct_black pct_hisp pct_asian
Min. :11.29 Min. : 0.310 Min. : 1.540 Min. : 0.310
1st Qu.:68.53 1st Qu.: 2.373 1st Qu.: 3.770 1st Qu.: 1.232
Median :77.54 Median : 5.105 Median : 5.380 Median : 1.920
Mean :73.78 Mean : 8.888 Mean : 8.949 Mean : 3.100
3rd Qu.:83.04 3rd Qu.:11.425 3rd Qu.:10.360 3rd Qu.: 3.902
Max. :94.12 Max. :65.930 Max. :74.850 Max. :17.700
pct_other
Min. : 1.780
1st Qu.: 4.110
Median : 5.085
Mean : 5.288
3rd Qu.: 6.138
Max. :13.280
Summary of HOLC Grade B:
|>
metro_grades filter(holc_grade == "B") |>
select(pct_white:pct_other) |>
summary(metro_grades)
pct_white pct_black pct_hisp pct_asian
Min. : 6.63 Min. : 1.190 Min. : 1.600 Min. : 0.180
1st Qu.:50.03 1st Qu.: 6.638 1st Qu.: 4.840 1st Qu.: 1.062
Median :62.86 Median :12.390 Median : 7.825 Median : 2.100
Mean :59.94 Mean :16.613 Mean :14.273 Mean : 3.296
3rd Qu.:72.12 3rd Qu.:23.457 3rd Qu.:17.385 3rd Qu.: 4.325
Max. :90.97 Max. :76.270 Max. :90.770 Max. :31.390
pct_other
Min. : 1.040
1st Qu.: 4.625
Median : 5.590
Mean : 5.879
3rd Qu.: 6.990
Max. :15.460
Summary of HOLC Grade C:
|>
metro_grades filter(holc_grade == "C") |>
select(pct_white:pct_other) |>
summary(metro_grades)
pct_white pct_black pct_hisp pct_asian
Min. : 6.99 Min. : 1.85 Min. : 1.83 Min. : 0.140
1st Qu.:33.69 1st Qu.: 8.70 1st Qu.: 6.33 1st Qu.: 0.950
Median :48.43 Median :19.96 Median :11.26 Median : 2.220
Mean :48.65 Mean :23.01 Mean :18.89 Mean : 3.427
3rd Qu.:63.34 3rd Qu.:31.32 3rd Qu.:27.13 3rd Qu.: 3.750
Max. :87.65 Max. :83.36 Max. :88.33 Max. :24.380
pct_other
Min. : 1.190
1st Qu.: 4.680
Median : 5.790
Mean : 6.025
3rd Qu.: 7.280
Max. :15.220
Summary of HOLC Grade D:
|>
metro_grades filter(holc_grade == "D") |>
select(pct_white:pct_other) |>
summary(metro_grades)
pct_white pct_black pct_hisp pct_asian
Min. : 3.77 Min. : 1.21 Min. : 1.100 Min. : 0.090
1st Qu.:22.55 1st Qu.:11.90 1st Qu.: 5.965 1st Qu.: 0.635
Median :39.85 Median :28.44 Median :11.690 Median : 1.685
Mean :39.39 Mean :31.44 Mean :20.077 Mean : 3.104
3rd Qu.:53.08 3rd Qu.:43.30 3rd Qu.:28.152 3rd Qu.: 3.770
Max. :86.17 Max. :85.40 Max. :93.900 Max. :23.710
pct_other
Min. : 0.880
1st Qu.: 4.395
Median : 5.605
Mean : 5.981
3rd Qu.: 7.380
Max. :17.730