Top Video Games

Appendix to report

Data cleaning

library(tidyverse)
library(lubridate)
library(readr)
#read csv files and rename title and platform to game_title and game_platform which we will join meta and meta_titles on
meta_titles <- read_csv("data/meta-titles.csv") |>
  rename(game_title = title,
         game_platform = platform,
         meta_score = metascore)
Rows: 600 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): title, platform
dbl (2): user_score, metascore

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
meta <- read_csv("data/meta.csv")
Rows: 600 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (8): title, platform, developer, date, genre, number_of_players, rating,...
dbl (1): critic_reviews

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#clean developer to get rid of extraneous text
meta$developer <- meta$developer |>
  str_replace_all("Developer: ", "")
#clean user_reviews to get rid of extraneous text
meta$user_reviews <- meta$user_reviews |>
  str_replace_all(" Ratings", "")
#change date to a more readable format
meta$date <- meta$date |>
  mdy()
#clean genre to get rid of extraneous text
meta$genre <- meta$genre |>
  str_replace_all("Genre\\(s\\): ", "")
#
meta <- meta  |>
  mutate(
    number_of_players = case_when(
      number_of_players == "No Online Multiplayer" ~ "1",
      number_of_players == "1 Player" ~ "1",
      number_of_players == "2 Online" ~ "2",
      number_of_players == "4 Online" ~ "4",
      number_of_players == "8 Online" ~ "8",
      is.na(number_of_players) ~ "0",
      TRUE ~ number_of_players
    ),
    #create game_title in meta which we can join the csvs on 
    game_title = title |> 
    tolower() |> 
    str_replace_all(" ", "-") |> 
    str_replace_all("[':]", "") |>
    str_replace_all("[.]", "") |>
    str_replace_all("-&", "") |>
    str_replace_all("[(]", "") |>
    str_replace_all("[)]", "") |>
    str_replace_all("-/", ""),
    #create game_platform which we can join on
    game_platform = platform |>
      tolower() |>
      str_replace_all(" ", "-"),
    # separate number of players into simplified categories
    game_mode = case_when(
      number_of_players == "1" ~ "Single Player", 
      number_of_players == "0" ~ "Unknown",
      TRUE ~ "Multiplayer"
    ),
    genre = str_split(genre, ", ")
  )

metacritic <- left_join(meta,meta_titles, by = c("game_title", "game_platform")) |>
  select(!c(game_title,game_platform))

metacritic <- metacritic |> 
  unnest(cols = genre) |>
  distinct()

write.csv(metacritic, file = "data/metacritic.csv")

Other appendicies (as necessary)