Interactive plots

Libraries

suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(htmlwidgets))

Data cleaning and Transformation

Reading in data

suppressMessages(df <- read_excel("Education Statistics Digest 2023-Statistical Tables.xlsx", sheet = "46"))
knitr::kable(df)
46 PERCENTAGE OF O-LEVEL STUDENTS WHO PROGRESSED TO POST-SECONDARY EDUCATION …2 …3 …4 …5 …6 …7 …8 …9 …10 …11 …12
Ethnic Group NA 2013.0 2014.0 2015.0 2016.0 2017.0 2018.0 2019.0 2020.0 2021 2022.0
Malay % 98.7 98.5 98.8 98.7 98.9 98.4 98.8 98.9 98.9 98.8
Chinese % 98.5 98.5 98.6 99.1 99.0 99.0 99.3 99.4 99.5 99.5
Indian % 97.5 97.8 97.4 98.3 98.5 97.3 98.3 98.5 98.5 98.7
Others % 90.7 91.3 91.6 92.7 93.2 92.6 94.5 93.8 94.1 94.7
Overall % 98.1 98.2 98.2 98.7 98.6 98.5 98.9 98.9 99 99.1
Note: 1) Figures include participation in Junior Colleges, Millennia Institute, Polytechnics, Institute of Technical Education (ITE), LASALLE College of the Arts, Nanyang Academy of Fine Arts and other private education institutions, and take into account of students who have left the country. From 2015 onwards, figures also include participation in Privately-Funded Schools and Foreign System Schools. NA NA NA NA NA NA NA NA NA NA
2) Figures for 2018 - 2022 are preliminary estimates as these cohorts have not been fully tracked. Data for 2022 may be under-estimates as admissions data for 2023 into private education institutions is not available yet. NA NA NA NA NA NA NA NA NA NA
NA NA NA NA NA NA NA NA NA NA Back to Content Page NA

Remove unnecessary rows and columns

df <- read_excel("Education Statistics Digest 2023-Statistical Tables.xlsx", sheet = "46", skip = 1, n_max = 5)
New names:
• `` -> `...2`
df <- df[ , -2]

Pitvot table and change Year datatype to Int

# Convert to long format
df_long <- df |>
  pivot_longer(cols = -`Ethnic Group`, names_to = "Year", values_to = "Percentage")

# Ensure Year is integer
df_long$Year <- as.integer(df_long$Year)

Check for mising values

sapply(df_long, function(x) sum(is.na(x)))
Ethnic Group         Year   Percentage 
           0            0            0 
knitr::kable(df_long)
Ethnic Group Year Percentage
Malay 2013 98.7
Malay 2014 98.5
Malay 2015 98.8
Malay 2016 98.7
Malay 2017 98.9
Malay 2018 98.4
Malay 2019 98.8
Malay 2020 98.9
Malay 2021 98.9
Malay 2022 98.8
Chinese 2013 98.5
Chinese 2014 98.5
Chinese 2015 98.6
Chinese 2016 99.1
Chinese 2017 99.0
Chinese 2018 99.0
Chinese 2019 99.3
Chinese 2020 99.4
Chinese 2021 99.5
Chinese 2022 99.5
Indian 2013 97.5
Indian 2014 97.8
Indian 2015 97.4
Indian 2016 98.3
Indian 2017 98.5
Indian 2018 97.3
Indian 2019 98.3
Indian 2020 98.5
Indian 2021 98.5
Indian 2022 98.7
Others 2013 90.7
Others 2014 91.3
Others 2015 91.6
Others 2016 92.7
Others 2017 93.2
Others 2018 92.6
Others 2019 94.5
Others 2020 93.8
Others 2021 94.1
Others 2022 94.7
Overall 2013 98.1
Overall 2014 98.2
Overall 2015 98.2
Overall 2016 98.7
Overall 2017 98.6
Overall 2018 98.5
Overall 2019 98.9
Overall 2020 98.9
Overall 2021 99.0
Overall 2022 99.1

Data Visualisation

Colour Palette

dark_color_palette <- c(
  "#009E73",  
  "#F5C710",  
  "#0072B2", 
  "#D55E00",  
  "#CC79A7"
)

Improvement 1

# Create the plot
p <- ggplot(df_long, aes(x = Year, y = Percentage, color = `Ethnic Group`, group = `Ethnic Group`)) +
  geom_line() +
  geom_point() +
  labs(
    title = "Percentage of O-Level Students Who Progressed To Post-Secondary Education",
    x = "Year",
    y = "Percentage",
    color = "Ethnic Group") +
  scale_color_manual(values = dark_color_palette) +
  scale_x_continuous(breaks = unique(df_long$Year)) +
  theme_minimal()

# Convert to plotly object for interactivity
p_interactive <- ggplotly(p, tooltip = c("x", "y", "color"))

p_interactive <- p_interactive %>%
  layout(
    hoverlabel = list(
      font = list(
        size = 14,
        family = "Arial",
        color = "white"
      ),
      bordercolor = "black" 
    ),
    annotations = list(
      list(
        x = -0.05,
        y = -0.1,
        xref = 'paper',
        yref = 'paper',
        text = "Source: Ministry of Education Singapore",
        showarrow = FALSE,
        font = list(
          size = 12,
          family = "Arial",
          color = "black"
        ),
        align = "left"
      )
    )
  )

# Display the plot
p_interactive

Improvement 2

# Create the main plot with Plotly
main_plot <- plot_ly(
  data = df_long,
  x = ~Year,
  y = ~Percentage,
  color = ~`Ethnic Group`,
  colors = dark_color_palette,
  type = 'scatter',
  mode = 'lines+markers',
  text = ~paste("Year:", Year, "<br>Percentage:", Percentage, "<br>Group:", `Ethnic Group`),
  hoverinfo = "text"
) %>% layout(
  yaxis = list(
    range = c(0, 100),
    tickvals = c(0, 25, 50, 75, 100),
    title = "Percentage"
  ),
  xaxis = list(
    tickvals = unique(df_long$Year),  # Set the tick values to unique years
    ticktext = as.character(unique(df_long$Year)),
    title = "Year"
  ),
  title = "Percentage of O-Level Students Who Progressed to Post-Secondary Education",
  legend = list(title = list(text = "Ethnic Group")),
  annotations = list(
    list(
      x = 0,
      y = -0.2,
      xref = 'paper',
      yref = 'paper',
      text = "Source: Ministry of Education Singapore",
      showarrow = FALSE,
      font = list(
        size = 12,
        family = "Arial",
        color = "black"
      ),
      align = "left"
    )
  ),
  margin = list(b = 100)  # Increase the bottom margin
)

# Add custom JavaScript for click zoom into point
main_plot <- main_plot %>%
  htmlwidgets::onRender("
    function(el, x) {
      el.on('plotly_click', function(d) {
        var point = d.points[0];
        if (point) {
          var yValue = point.y;
          var yBinRange = 30; // Define the bin range for y-axis
          var xBinRange = 1.5; // Define the bin range for x-axis to show 5 years
          var update = {
            'xaxis.range': [point.x - xBinRange, point.x + xBinRange], // Show 5 years around the clicked point
            'yaxis.range': [yValue - yBinRange, yValue + yBinRange]
          };
          Plotly.relayout(el, update);
        }
      });
    }
  ")

main_plot