── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.6
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.1 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.2
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
Attaching package: 'maps'
The following object is masked from 'package:purrr':
map
FBI Crime Data Analysis
# Data cleaning and structuring
agencies_cleaned <- agencies %>%
mutate(
# Standardize agency type
agency_type = str_to_title(str_trim(replace_na(agency_type, "Unknown"))),
# Convert dates
nibrs_start_date = as.Date(nibrs_start_date),
nibrs_start_year = year(nibrs_start_date),
# Boolean indicator
is_nibrs = case_when(
is_nibrs %in%
c(TRUE, "TRUE", "True", "T", 1, "1", "Yes", "YES", "Y") ~ TRUE,
is_nibrs %in%
c(FALSE, "FALSE", "False", "F", 0, "0", "No", "NO", "N") ~ FALSE,
TRUE ~ NA
)
)
ori_dupes <- agencies_cleaned %>%
count(ori, sort = TRUE) %>%
filter(n > 1)
ori_dupes[1] ori n
<0 rows> (or 0-length row.names)
# Define political leaning (2020 Election)
dem_states <- c(
'AZ',
'CA',
'CO',
'CT',
'DE',
'DC',
'GA',
'HI',
'IL',
'ME',
'MD',
'MA',
'MI',
'MN',
'NV',
'NH',
'NJ',
'NM',
'NY',
'OR',
'PA',
'RI',
'VT',
'VA',
'WA',
'WI'
)
rep_states <- c(
'AL',
'AK',
'AR',
'FL',
'ID',
'IN',
'IA',
'KS',
'KY',
'LA',
'MS',
'MO',
'MT',
'NE',
'NC',
'ND',
'OH',
'OK',
'SC',
'SD',
'TN',
'TX',
'UT',
'WV',
'WY'
)
agencies_cleaned <- agencies_cleaned %>%
mutate(
state_political_leaning = case_when(
state_abbr %in% dem_states ~ "Democratic",
state_abbr %in% rep_states ~ "Republican",
TRUE ~ "Other"
)
)
setdiff(unique(na.omit(agencies_cleaned$state_abbr)), c(dem_states, rep_states))character(0)
# Q1 metrics
agencies_q1 <- agencies_cleaned %>%
filter(!is.na(state_abbr)) # avoid NA-state groups
# State baseline
state_baseline <- agencies_cleaned %>%
group_by(state_abbr) %>%
summarise(
state_overall_participation_rate = mean(is_nibrs, na.rm = TRUE),
.groups = "drop"
)
# Agency type metrics per state
q1_metrics <- agencies_q1 %>%
group_by(state_abbr, state, state_political_leaning, agency_type) %>%
summarise(
n_agencies = n(),
n_nibrs = sum(is_nibrs, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(pct_nibrs = n_nibrs / n_agencies) %>%
left_join(state_baseline, by = "state_abbr") %>%
mutate(participation_gap = pct_nibrs - state_overall_participation_rate) %>%
# Flag or filter small groups (threshold < 5 as per proposal)
mutate(is_reliable = n_agencies >= 5)
state_political_summary <- agencies_q1 %>%
group_by(state_political_leaning) %>%
summarise(
participation_rate = mean(is_nibrs, na.rm = TRUE),
n_agencies = n(),
n_states = n_distinct(state_abbr),
.groups = "drop"
)
state_political_summary# A tibble: 2 × 4
state_political_leaning participation_rate n_agencies n_states
<chr> <dbl> <int> <int>
1 Democratic 0.697 9339 25
2 Republican 0.871 9827 25
# Q2 metrics
# National median for adoption delay calculation
national_median <- agencies_cleaned %>%
filter(is_nibrs == TRUE) %>%
pull(nibrs_start_year) %>%
median(na.rm = TRUE)
# Trends (Yearly & Cumulative)
q2_trends <- agencies_cleaned %>%
filter(is_nibrs == TRUE, !is.na(nibrs_start_year)) %>%
group_by(agency_type, nibrs_start_year) %>%
summarise(n_new_adopters_year = n(), .groups = "drop") %>%
arrange(agency_type, nibrs_start_year) %>%
group_by(agency_type) %>%
mutate(cumulative_adopters_year = cumsum(n_new_adopters_year)) %>%
left_join(
agencies_cleaned %>%
group_by(agency_type) %>%
summarise(total_agencies_type = n()),
by = "agency_type"
) %>%
mutate(
cumulative_adoption_rate = cumulative_adopters_year / total_agencies_type
)
# Adoption Timing Summary
q2_summary <- agencies_cleaned %>%
filter(is_nibrs == TRUE, !is.na(nibrs_start_year)) %>%
group_by(agency_type) %>%
summarise(
median_adoption_year = median(nibrs_start_year),
mean_adoption_year = mean(nibrs_start_year),
interquartile_range = IQR(nibrs_start_year),
.groups = "drop"
) %>%
# Add "Time to 50% Cumulative Adoption"
left_join(
q2_trends %>%
filter(cumulative_adoption_rate >= 0.5) %>%
group_by(agency_type) %>%
summarise(time_to_50_pct_adoption = min(nibrs_start_year)),
by = "agency_type"
) %>%
mutate(adoption_delay = median_adoption_year - national_median)
q2_summary# A tibble: 8 × 6
agency_type median_adoption_year mean_adoption_year interquartile_range
<chr> <dbl> <dbl> <dbl>
1 City 2016 2011. 19
2 County 2013 2010. 20
3 Other 2017 2013. 16
4 Other State Agency 2004 2008. 12
5 State Police 1998 2006. 27
6 Tribal 2021 2020. 0
7 University Or Col… 2019 2013. 16
8 Unknown 2021 2021. 3
# ℹ 2 more variables: time_to_50_pct_adoption <dbl>, adoption_delay <dbl>
Introduction
Law enforcement agencies across the United States vary widely in size, jurisdiction, and reporting practices. Understanding how these agencies are distributed geographically and how broadly they participate in standardized crime reporting systems is important to interpret national crime data. In this report we wanted to explore a dataset of U.S. law enforcement agencies, examining patterns in agency type, location, and participation in the FBI’s National Incident-Based Reporting System (NIBRS) through a series of visualizations.
Question 1 <- Are certain types of law enforcement agencies systematically less likely to participate in NIBRS reporting, even within the same state?
Introduction
While the FBI’s National Incident-Based Reporting System (NIBRS) offers a standardized framework for tracking incidents, participation is far from universal. The question is whether certain types of agencies consistently fall behind, and whether that gap holds up even when comparing agencies operating under the same state policies.
If participation gaps persist within states, it points to something deeper than policy: structural differences in how certain kinds of agencies engage with standardized reporting. To investigate this, we draw on is_nibrs, agency_type, and state_abbr. With this we aim to see what agency types are systematically underrepresented and how complete and trustworthy US National crime data is.
Approach
To investigate whether certain agency types are systematically underrepresented in NIBRS, we built the analysis in three steps.
We started by calculating each agency type’s participation rate relative to its state’s overall rate, using
is_nibrsandagency_type, and summarized these within-state gaps in an aggregated dot plot. Point size encodes the share of states in which a given agency type falls below the state baseline, capturing both the magnitude and consistency of the gap across the country.We then brought in a choropleth map of overall participation rates by state using
state_abbr, to contextualize how much of the variation might still be driven by geography and state-level policy.Finally, we produced a faceted dot plot focusing on the eight most agency-dense states, where larger agency counts yield more reliable within-state estimates, to test whether the aggregate patterns hold locally or are smoothed-out artifacts of the national summary.
Analysis
# graph 1: how systematic is under-participation by agency type?
plot_systematic <- q1_metrics |>
filter(is_reliable == TRUE) |>
group_by(agency_type) |>
summarise(
avg_gap = mean(participation_gap, na.rm = TRUE),
med_gap = median(participation_gap, na.rm = TRUE),
pct_states_below = mean(participation_gap < 0, na.rm = TRUE),
n_states = n_distinct(state_abbr),
.groups = "drop"
) |>
# drop Unknown if you want a cleaner story
filter(agency_type != "Unknown") |>
arrange(avg_gap) |>
mutate(agency_type = fct_inorder(agency_type))
ggplot(plot_systematic, aes(x = avg_gap, y = agency_type)) +
geom_vline(
xintercept = 0,
linetype = "dashed",
color = "gray50",
linewidth = 0.4
) +
geom_point(aes(size = pct_states_below), alpha = 0.85) +
geom_segment(
aes(x = 0, xend = avg_gap, yend = agency_type),
color = "gray70"
) +
scale_x_continuous(labels = label_percent(accuracy = 1)) +
scale_size_continuous(
labels = label_percent(accuracy = 1),
range = c(2, 8),
name = "% of states below baseline"
) +
labs(
title = "Which agency types systematically lag NIBRS participation?",
subtitle = "Average within-state participation gap across states (reliable groups only)",
x = "Average participation gap vs state baseline",
y = "Agency Type"
) +
theme_minimal()
# graph 2: choropleth
state_map_data <- state_baseline |>
mutate(region = tolower(state.name[match(state_abbr, state.abb)]))
us_map <- map_data("state")
map_plot_data <- left_join(us_map, state_map_data, by = "region")
ggplot(
map_plot_data,
aes(long, lat, group = group, fill = state_overall_participation_rate)
) +
geom_polygon(color = "white", linewidth = 0.2) +
coord_fixed(1.3) +
scale_fill_viridis_c(
labels = label_percent(),
name = "Participation Rate"
) +
labs(
title = "State-level overall NIBRS participation rates",
x = NULL,
y = NULL
) +
theme_void()
# graph 3 (better): participation gap vs state baseline
top_states <- agencies_cleaned |>
count(state_abbr, sort = TRUE) |>
slice_head(n = 8) |>
pull(state_abbr)
plot_gap <- q1_metrics |>
filter(state_abbr %in% top_states, is_reliable == TRUE) |>
mutate(
agency_type = fct_reorder(
agency_type,
participation_gap,
.fun = median,
.desc = FALSE
)
)
ggplot(plot_gap, aes(x = participation_gap, y = agency_type)) +
geom_vline(
xintercept = 0,
linetype = "dashed",
linewidth = 0.4,
color = "gray50"
) +
geom_point(
aes(size = n_agencies, color = participation_gap > 0),
alpha = 0.85
) +
facet_wrap(~state_abbr, ncol = 2) +
scale_x_continuous(labels = label_percent(accuracy = 1)) +
scale_size_continuous(range = c(1.5, 6), breaks = c(5, 25, 100)) +
scale_color_manual(
values = c(`TRUE` = "#1f77b4", `FALSE` = "#d62728"),
guide = "none"
) +
labs(
title = "NIBRS participation gap by agency type (within-state)",
subtitle = "Each point shows agency-type participation minus the state's overall participation rate (top 8 states by agencies)",
x = "Participation gap vs state baseline",
y = "Agency Type",
size = "Agencies in group"
) +
theme_minimal()
Discussion
The aggregated dot plot reveals a consistent pattern: “Other”, “Other State Agency”, and State Police agencies participate at lower rates than the state baseline, while Tribal and County agencies tend to exceed it. Notably, the large dot sizes for the lagging categories indicate these gaps are not small-sample cases but widespread tendencies that hold across many states. One possible explanation is that catch-all agency types like “Other” lack the institutional infrastructure or federal pressure that more formally defined agencies face, making standardized reporting adoption slower and more uneven.
The choropleth map adds an important layer to this picture. Participation is largely a state-level story: most western states report near-universal adoption while Pennsylvania and Florida stand out as notable laggards. This confirms that state mandates and infrastructure play a significant role in driving overall participation rates, which is precisely why controlling for state was essential in the first place. Without that control, agency-type gaps could easily be mistaken for geographic ones.
The faceted chart then tests whether these patterns hold at the state level. Looking at the eight most agency-dense states, the negative gaps for State Police and “Other” categories visible in the aggregate do reappear, particularly in FL, NJ, and NY, giving credibility to the idea that these are structural rather than incidental patterns. At the same time, states like CA and IL show a messier picture where gaps are smaller and less consistent, suggesting that high agency density alone does not guarantee uniform adoption. The overall picture is that agency type shapes participation, but the strength of that relationship varies with the local policy and institutional environment each agency operates in.
Question 2 <- When did agencies adopt NIBRS reporting, and which agency types adopted later than others?
Introduction
The transition to NIBRS represents a fundamental shift in how the United States tracks crime, moving from aggregate counts to detailed, incident-level data. But this shift did not happen overnight, and it did not happen evenly. Some agencies signed on early, building a culture of standardized reporting long before it was required. Others waited, dragged their feet, or needed a federal push to move at all. This question is about mapping that journey: who were the pioneers, who were the procrastinators, and what does the timing of adoption tell us about the forces driving it?
To do this, we lean on nibrs_start_date to trace when agencies joined the system and agency_type to see whether certain kinds of agencies consistently moved faster or slower than others. We are particularly interested in whether the 2021 FBI federal mandate acted as a universal catalyst or simply formalized a divide that had been building for decades. For researchers relying on NIBRS data, these lags matter: an agency that only joined in 2021 left years of historical blind spots in the record.
Approach
To tell this story, we use two visualizations that each illuminate a different dimension of the adoption timeline.
The first is a cumulative line plot by agency type, tracking the share of agencies within each type that had joined NIBRS by any given year. Using cumulative rates rather than raw counts normalizes across agency types of different sizes, making adoption speed directly comparable. The slope of each line carries the story: a steep early rise signals a type that moved fast, while a line that only jumps around 2021 points to one that needed the federal mandate to act.
The second is a faceted area chart organized by state, shifting the lens from who lagged to where. Each state panel shows how its agencies accumulated NIBRS participation over time, with states ordered by median adoption year so that early adopters naturally rise to the top and late adopters fall to the bottom. The area format reinforces this visually, with slow-adopting states showing a long flat stretch before a sudden fill near the federal deadline.
Analysis
# 1. THE SECTOR STORY: MUNICIPAL LEADERS VS. SPECIALIZED LAGGARDS
q2_trends_clean <- q2_trends %>%
filter(total_agencies_type > 50, !agency_type %in% c("Unknown", "Other"))
ggplot(
q2_trends_clean,
aes(x = nibrs_start_year, y = cumulative_adoption_rate, color = agency_type)
) +
geom_line(size = 1.2) +
scale_y_continuous(labels = scales::percent_format(), expand = c(0, 0)) +
scale_color_brewer(palette = "Dark2") +
labs(
# The title states the primary finding
title = "The 2021 NIBRS Mandate Forced a Deadline Surge for Specialized Agencies",
# The subtitle explains the contrast and provides "how"
subtitle = "While Municipal and County police achieved steady adoption over a decade, Tribal and \nUniversity reporting remained stagnant until vertical spikes following the federal deadline.",
x = "Year",
y = "Total Adoption Rate",
color = "Agency Type"
) +
theme_minimal() +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 14),
panel.grid.major = element_line(color = "gray90"),
panel.grid.minor = element_blank()
)Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

# 2. THE GEOGRAPHIC STORY: PIONEERS VS. PROCRASTINATORS
target_states <- c("VA", "MI", "WV", "CA", "NY", "FL", "TX", "GA", "IL")
primary_color <- "#2c7fb8"
state_story <- agencies_cleaned %>%
filter(
is_nibrs == TRUE,
!is.na(nibrs_start_year),
state_abbr %in% target_states
) %>%
group_by(state_abbr, nibrs_start_year) %>%
summarise(n_new = n(), .groups = "drop") %>%
group_by(state_abbr) %>%
mutate(cum_rate = cumsum(n_new) / sum(n_new)) %>%
# Reorder states by median adoption year so 'Pioneers' appear first and 'Procrastinators' last
mutate(state_abbr = fct_reorder(state_abbr, nibrs_start_year, .fun = median))
ggplot(state_story, aes(x = nibrs_start_year, y = cum_rate)) +
geom_area(fill = primary_color, alpha = 0.2) +
geom_line(color = primary_color, size = 1) +
# Vertical line marks the 2021 mandate to explain the sudden spikes in NY/CA/FL
geom_vline(
xintercept = 2021,
linetype = "dashed",
color = "gray50",
size = 0.5
) +
facet_wrap(~state_abbr, nrow = 3) +
scale_y_continuous(
labels = scales::percent_format(),
limits = c(0, 1),
expand = c(0, 0)
) +
labs(
# Narrative Title
title = "State Mandates and Federal Deadlines Erased the Geographic 'Reporting Gap'",
subtitle = "While early adopters like Virginia and Michigan reached full NIBRS participation decades ago,\nlaggards like California and New York only transitioned once the 2021 federal deadline arrived.",
x = "Year",
y = "Adoption Progress"
) +
theme_minimal() +
theme(
strip.text = element_text(face = "bold", size = 11),
plot.title = element_text(face = "bold", size = 16),
plot.subtitle = element_text(
size = 12,
color = "gray20",
margin = margin(b = 15)
),
panel.grid.major = element_line(color = "grey90"),
panel.grid.minor = element_blank(),
panel.spacing = unit(1.5, "lines")
)
Discussion
(1-3 paragraphs) In the Discussion section, interpret the results of your analysis. Identify any trends revealed (or not revealed) by the plots. Speculate about why the data looks the way it does.
Presentation
Our presentation can be found here.
Data
Johnson, B., & R4DS Online Learning Community. (2025). FBI Crime Data API — Law enforcement agencies [Data file]. TidyTuesday (Week 7, 2025).Retrieved February 11, 2026, from https://github.com/rfordatascience/tidytuesday/blob/main/data/2025/2025-02-18/readme.md
Federal Bureau of Investigation. (2025). Crime Data Explorer: Agency-level data [Data set]. U.S. Department of Justice. https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/docApi
References
Johnson, B., & R4DS Online Learning Community. (2025). FBI Crime Data API — Law enforcement agencies [Data file]. TidyTuesday (Week 7, 2025).Retrieved February 11, 2026, from https://github.com/rfordatascience/tidytuesday/blob/main/data/2025/2025-02-18/readme.md
Federal Bureau of Investigation. (2025). Crime Data Explorer: Agency-level data [Data set]. U.S. Department of Justice. https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/docApi