This report examines the impact of major disruptions—including the COVID-19 pandemic, significant public events and extreme weather—on train usage patterns in Sydney between 2020 and 2025. Using Opal card data and Bureau of Meteorology (BOM) records we analyze changes in passenger volumes, identify key trends and compare the magnitude and duration of different types of disruptions.
This part of the project uses three main datasets:
The COVID-19 pandemic significantly affected train usage across Sydney from 2020 to mid-2024. Using Opal card data, this analysis tracks changes in station entries and exits across all train lines during key phases: early COVID, Delta (2021) and post-COVID recovery. Major stations like Central and International Airport showed sharp declines, reflecting reduced commuting and travel. Interactive visuals reveal how usage dropped rapidly during lockdowns, especially on lines like T9 and CCN. While some stations rebounded strongly by 2023, others continue to lag. This section highlights how travel patterns shifted and which parts of the network were most impacted.
# DATA PREP
# ignore closed stations
stations_to_ignore <- c("Rosehill", "Camellia", "Rydalmere", "Dundas", "Telopea", "Carlingford")
ee <- ee %>%
filter(Station_Type %in% c("Train", "Metro Shared")) %>%
mutate(
Train_Station = gsub(" Station", "", Station),
Train_Station = gsub(" $", "", Train_Station),
TripNumber = as.numeric(ifelse(Trip == "Less than 50", 50, Trip)),
MonthYear = as.Date(paste0(MonthYear, "-01"))
) %>%
filter(!Train_Station %in% stations_to_ignore) %>%
mutate(
Phase = case_when(
MonthYear >= as.Date("2020-01-01") & MonthYear <= as.Date("2021-06-30") ~ "Early COVID",
MonthYear >= as.Date("2021-07-01") & MonthYear <= as.Date("2021-12-31") ~ "Delta COVID",
MonthYear >= as.Date("2022-01-01") ~ "Post COVID",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(Phase))
# load and merge station coordinates
stations <- stations %>%
filter(!duplicated(Train_Station)) %>%
filter(Train_Station %in% ee$Train_Station)
ee <- ee %>%
left_join(stations %>%
select(Train_Station, LAT, LONG), by = "Train_Station") %>%
filter(!is.na(LAT) & !is.na(LONG))
ee_sf <- st_as_sf(ee, coords = c("LONG", "LAT"), crs = 4326)
# load route shapefile
trains <- st_read(shp_path, quiet = TRUE) %>% st_transform(crs = 4326)
# summarize
station_phase_totals <- ee_sf %>%
st_drop_geometry() %>%
group_by(Train_Station, Phase) %>%
summarise(
Total_Entries = sum(TripNumber[Entry_Exit == "Entry"], na.rm = TRUE),
Total_Exits = sum(TripNumber[Entry_Exit == "Exit"], na.rm = TRUE),
.groups = "drop"
)
# % drop and recovery + AVG
station_trends <- station_phase_totals %>%
pivot_wider(
names_from = Phase,
values_from = c(Total_Entries, Total_Exits),
names_glue = "{.value}_{gsub(' ', '_', Phase)}"
) %>%
mutate(
Entry_Drop_Pct = round((Total_Entries_Early_COVID - Total_Entries_Delta_COVID) / Total_Entries_Early_COVID * 100, 1),
Entry_Recovery_Pct = round((Total_Entries_Post_COVID - Total_Entries_Delta_COVID) / Total_Entries_Delta_COVID * 100, 1),
Exit_Drop_Pct = round((Total_Exits_Early_COVID - Total_Exits_Delta_COVID) / Total_Exits_Early_COVID * 100, 1),
Exit_Recovery_Pct = round((Total_Exits_Post_COVID - Total_Exits_Delta_COVID) / Total_Exits_Delta_COVID * 100, 1),
Avg_Drop_Pct = round((Entry_Drop_Pct + Exit_Drop_Pct) / 2, 1),
Avg_Recovery_Pct = round((Entry_Recovery_Pct + Exit_Recovery_Pct) / 2, 1)
)
This code defines a function to plot all stations on a map color-coded by route with interactive tooltips showing drop and recovery statistics and a bar chart for each station. The map supports zooming.
Purpose:
To visualize the change in train station activity during different
phases of the pandemic (Early COVID 2020, Delta 2021, Post-COVID 2023)
across all train lines in the Sydney network.
# plot func with zoom
create_station_plot_base64 <- function(station_name) {
station_data <- station_phase_totals |> filter(Train_Station == station_name)
station_data$Phase <- factor(station_data$Phase, levels = c("Early COVID", "Delta COVID", "Post COVID"))
long_data <- station_data |>
pivot_longer(cols = c(Total_Entries, Total_Exits),
names_to = "Type", values_to = "Count") |>
mutate(Type = ifelse(Type == "Total_Entries", "Entries", "Exits"))
p <- ggplot(long_data, aes(x = Phase, y = Count, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = Count), position = position_dodge(0.9), vjust = -0.7, size = 3.2) +
scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
labs(title = station_name) +
theme_minimal() +
theme(
legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1),
plot.margin = margin(10, 10, 10, 10)
)
file_path <- tempfile(fileext = ".png")
ggsave(file_path, plot = p, width = 4, height = 3.5, dpi = 150)
encoded <- base64enc::dataURI(file = file_path, mime = "image/png")
return(paste0("<img src='", encoded, "' width='260'>"))
}
tooltip_df <- station_trends |>
mutate(
chart_img = sapply(Train_Station, create_station_plot_base64),
tooltip = paste0(
chart_img, "<br>",
"<b>Drop & Recovery:</b><br>",
"Average Drop (Early → Delta): ", Avg_Drop_Pct, "%<br>",
"Average Recovery (Delta → Post): ", Avg_Recovery_Pct, "%"
)
)
df <- ee_sf |>
filter(Entry_Exit == "Entry") |>
distinct(Train_Station, .keep_all = TRUE) |>
left_join(tooltip_df, by = "Train_Station")
# plot
plot_all_phases <- function() {
trains_proj <- st_transform(trains, crs = 3857)
trains <- st_read(shp_path, quiet = TRUE)
sydney_region <- ne_states(country = "Australia", returnclass = "sf") |> filter(name_en == "New South Wales")
sydney_proj <- st_transform(sydney_region, crs = 3857)
df_proj <- st_transform(df, crs = 3857)
bbox_syd <- st_bbox(c(xmin = 150.5, xmax = 151.35, ymin = -34.15, ymax = -33.35), crs = 4326)
bbox_proj <- st_transform(st_as_sfc(bbox_syd), crs = 3857)
gg <- ggplot() +
geom_sf(data = sydney_proj, fill = "gray95", color = "gray85") +
geom_sf(data = trains_proj, aes(color = route_shor), size = 0.6) +
geom_point_interactive(
data = df_proj,
aes(geometry = geometry, tooltip = tooltip),
stat = "sf_coordinates",
size = 1.4,
colour = "darkred"
) +
coord_sf(xlim = st_bbox(bbox_proj)[c("xmin", "xmax")],
ylim = st_bbox(bbox_proj)[c("ymin", "ymax")]) +
ggtitle("Sydney Train Station Usage – COVID Phases") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16))
girafe(
ggobj = gg,
options = list(
opts_tooltip(css = "background-color:white; color:#00274D; font-size:10px; padding:5px;"),
opts_zoom(max = 5)
)
)
}
plot_all_phases()