IIHF World Championship Analysis

Beginner 10 min read 1 views Nov 27, 2025

Python Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Load World Championship data
wc_df = pd.read_csv("data/world_championship.csv")

# Tournament statistics by country
country_stats = wc_df.groupby("country").agg({
    "goals_for": "mean",
    "goals_against": "mean",
    "shots_for": "mean",
    "shots_against": "mean",
    "powerplay_goals": "sum",
    "wins": "sum"
}).round(2)

country_stats["goal_differential"] = country_stats["goals_for"] - country_stats["goals_against"]
country_stats = country_stats.sort_values("goal_differential", ascending=False)

print("World Championship Country Statistics:")
print(country_stats)

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Goals for vs against
axes[0,0].scatter(country_stats["goals_for"], country_stats["goals_against"],
                  s=country_stats["wins"]*50, alpha=0.6)
axes[0,0].plot([0, country_stats["goals_for"].max()],
               [0, country_stats["goals_for"].max()],
               "r--", alpha=0.5)
axes[0,0].set_title("Goals For vs Goals Against")
axes[0,0].set_xlabel("Goals For")
axes[0,0].set_ylabel("Goals Against")
axes[0,0].grid(True, alpha=0.3)

# Top countries by goal differential
top_countries = country_stats.nlargest(10, "goal_differential")
top_countries["goal_differential"].plot(kind="barh", ax=axes[0,1], color="green")
axes[0,1].set_title("Top 10 Countries by Goal Differential")
axes[0,1].set_xlabel("Goal Differential")
axes[0,1].set_ylabel("Country")

# Shooting efficiency
country_stats["shooting_pct"] = (country_stats["goals_for"] / country_stats["shots_for"]) * 100
country_stats["shooting_pct"].nlargest(12).plot(kind="bar", ax=axes[1,0], color="steelblue")
axes[1,0].set_title("Top 12 Shooting Percentage")
axes[1,0].set_xlabel("Country")
axes[1,0].set_ylabel("Shooting %")
axes[1,0].tick_params(axis="x", rotation=45)

# Powerplay effectiveness
country_stats.nlargest(12, "powerplay_goals")["powerplay_goals"].plot(
    kind="bar", ax=axes[1,1], color="orange")
axes[1,1].set_title("Top Powerplay Teams")
axes[1,1].set_xlabel("Country")
axes[1,1].set_ylabel("Powerplay Goals")
axes[1,1].tick_params(axis="x", rotation=45)

plt.tight_layout()
plt.savefig("outputs/world_championship_analysis.png", dpi=300, bbox_inches="tight")
plt.show()

# Medal prediction model
wc_df["expected_points"] = (
    wc_df["goals_for"] * 0.4 +
    wc_df["shots_for"] * 0.02 -
    wc_df["goals_against"] * 0.3
)

medal_contenders = wc_df.nlargest(8, "expected_points")[
    ["country", "goals_for", "goals_against", "expected_points"]
]
print("\nMedal Contenders:")
print(medal_contenders)

R Code

library(tidyverse)
library(ggplot2)
library(gridExtra)

# Load World Championship data
wc_df <- read.csv("data/world_championship.csv")

# Country statistics
country_stats <- wc_df %>%
  group_by(country) %>%
  summarise(
    goals_for = mean(goals_for, na.rm = TRUE),
    goals_against = mean(goals_against, na.rm = TRUE),
    shots_for = mean(shots_for, na.rm = TRUE),
    shots_against = mean(shots_against, na.rm = TRUE),
    powerplay_goals = sum(powerplay_goals, na.rm = TRUE),
    wins = sum(wins, na.rm = TRUE)
  ) %>%
  mutate(goal_differential = goals_for - goals_against) %>%
  arrange(desc(goal_differential))

print("World Championship Country Statistics:")
print(country_stats)

# Visualization
p1 <- ggplot(country_stats, aes(x = goals_for, y = goals_against, size = wins)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red", alpha = 0.5) +
  theme_minimal() +
  labs(title = "Goals For vs Goals Against", x = "Goals For", y = "Goals Against")

p2 <- country_stats %>%
  top_n(10, goal_differential) %>%
  ggplot(aes(x = reorder(country, goal_differential), y = goal_differential)) +
  geom_bar(stat = "identity", fill = "green") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Top 10 Countries by Goal Differential", x = "Country", y = "Goal Differential")

# Shooting efficiency
country_stats <- country_stats %>%
  mutate(shooting_pct = (goals_for / shots_for) * 100)

p3 <- country_stats %>%
  top_n(12, shooting_pct) %>%
  ggplot(aes(x = reorder(country, shooting_pct), y = shooting_pct)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  theme_minimal() +
  labs(title = "Top 12 Shooting Percentage", x = "Country", y = "Shooting %") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

p4 <- country_stats %>%
  top_n(12, powerplay_goals) %>%
  ggplot(aes(x = reorder(country, powerplay_goals), y = powerplay_goals)) +
  geom_bar(stat = "identity", fill = "orange") +
  theme_minimal() +
  labs(title = "Top Powerplay Teams", x = "Country", y = "Powerplay Goals") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)

ggsave("outputs/world_championship_analysis_r.png", combined_plot, width = 14, height = 10, dpi = 300)

# Medal prediction
wc_df <- wc_df %>%
  mutate(expected_points = goals_for * 0.4 + shots_for * 0.02 - goals_against * 0.3)

medal_contenders <- wc_df %>%
  arrange(desc(expected_points)) %>%
  select(country, goals_for, goals_against, expected_points) %>%
  head(8)

print("Medal Contenders:")
print(medal_contenders)

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.