library(tidyverse)
library(lubridate)
library(readr)
Top Video Games
Appendix to report
Data cleaning
#read csv files and rename title and platform to game_title and game_platform which we will join meta and meta_titles on
<- read_csv("data/meta-titles.csv") |>
meta_titles rename(game_title = title,
game_platform = platform,
meta_score = metascore)
Rows: 600 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): title, platform
dbl (2): user_score, metascore
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- read_csv("data/meta.csv") meta
Rows: 600 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): title, platform, developer, date, genre, number_of_players, rating,...
dbl (1): critic_reviews
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#clean developer to get rid of extraneous text
$developer <- meta$developer |>
metastr_replace_all("Developer: ", "")
#clean user_reviews to get rid of extraneous text
$user_reviews <- meta$user_reviews |>
metastr_replace_all(" Ratings", "")
#change date to a more readable format
$date <- meta$date |>
metamdy()
#clean genre to get rid of extraneous text
$genre <- meta$genre |>
metastr_replace_all("Genre\\(s\\): ", "")
#
<- meta |>
meta mutate(
number_of_players = case_when(
== "No Online Multiplayer" ~ "1",
number_of_players == "1 Player" ~ "1",
number_of_players == "2 Online" ~ "2",
number_of_players == "4 Online" ~ "4",
number_of_players == "8 Online" ~ "8",
number_of_players is.na(number_of_players) ~ "0",
TRUE ~ number_of_players
),#create game_title in meta which we can join the csvs on
game_title = title |>
tolower() |>
str_replace_all(" ", "-") |>
str_replace_all("[':]", "") |>
str_replace_all("[.]", "") |>
str_replace_all("-&", "") |>
str_replace_all("[(]", "") |>
str_replace_all("[)]", "") |>
str_replace_all("-/", ""),
#create game_platform which we can join on
game_platform = platform |>
tolower() |>
str_replace_all(" ", "-"),
# separate number of players into simplified categories
game_mode = case_when(
== "1" ~ "Single Player",
number_of_players == "0" ~ "Unknown",
number_of_players TRUE ~ "Multiplayer"
),genre = str_split(genre, ", ")
)
<- left_join(meta,meta_titles, by = c("game_title", "game_platform")) |>
metacritic select(!c(game_title,game_platform))
<- metacritic |>
metacritic unnest(cols = genre) |>
distinct()
write.csv(metacritic, file = "data/metacritic.csv")