Playing Style Analysis by Country

Beginner 10 min read 18 views Nov 27, 2025

Python Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Load international style data
style_df = pd.read_csv("data/international_styles.csv")

# Calculate style metrics
style_metrics = style_df.groupby("country").agg({
    "physicality_index": "mean",
    "speed_rating": "mean",
    "skill_rating": "mean",
    "defensive_zone_time": "mean",
    "offensive_zone_time": "mean",
    "neutral_zone_time": "mean",
    "dump_in_pct": "mean",
    "controlled_entry_pct": "mean"
}).round(2)

print("Playing Style Metrics by Country:")
print(style_metrics)

# Normalize for PCA
scaler = StandardScaler()
style_normalized = scaler.fit_transform(style_metrics)

# PCA analysis
pca = PCA(n_components=2)
style_pca = pca.fit_transform(style_normalized)

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# PCA plot
axes[0,0].scatter(style_pca[:, 0], style_pca[:, 1], s=100, alpha=0.6)
for i, country in enumerate(style_metrics.index):
    axes[0,0].annotate(country, (style_pca[i, 0], style_pca[i, 1]),
                       fontsize=8, alpha=0.8)
axes[0,0].set_title("Playing Style Similarity (PCA)")
axes[0,0].set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]:.1%})")
axes[0,0].set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]:.1%})")
axes[0,0].grid(True, alpha=0.3)

# Speed vs Physicality
axes[0,1].scatter(style_metrics["speed_rating"],
                  style_metrics["physicality_index"],
                  s=style_metrics["skill_rating"]*10, alpha=0.6)
for country in style_metrics.index[:8]:
    axes[0,1].annotate(country,
                       (style_metrics.loc[country, "speed_rating"],
                        style_metrics.loc[country, "physicality_index"]),
                       fontsize=7)
axes[0,1].set_title("Speed vs Physicality")
axes[0,1].set_xlabel("Speed Rating")
axes[0,1].set_ylabel("Physicality Index")
axes[0,1].grid(True, alpha=0.3)

# Zone time comparison
zone_time_data = style_metrics[[
    "defensive_zone_time", "neutral_zone_time", "offensive_zone_time"
]].head(10)
zone_time_data.plot(kind="bar", stacked=True, ax=axes[1,0])
axes[1,0].set_title("Zone Time Distribution (Top 10 Countries)")
axes[1,0].set_xlabel("Country")
axes[1,0].set_ylabel("Time %")
axes[1,0].legend(["Defensive", "Neutral", "Offensive"], loc="upper right")
axes[1,0].tick_params(axis="x", rotation=45)

# Entry style comparison
entry_comparison = style_metrics[["dump_in_pct", "controlled_entry_pct"]].head(12)
entry_comparison.plot(kind="bar", ax=axes[1,1])
axes[1,1].set_title("Entry Style: Dump-In vs Controlled")
axes[1,1].set_xlabel("Country")
axes[1,1].set_ylabel("Percentage")
axes[1,1].legend(["Dump-In %", "Controlled Entry %"])
axes[1,1].tick_params(axis="x", rotation=45)

plt.tight_layout()
plt.savefig("outputs/international_style_differences.png", dpi=300, bbox_inches="tight")
plt.show()

# Style clustering
style_metrics["style_type"] = np.where(
    style_metrics["physicality_index"] > style_metrics["physicality_index"].median(),
    "Physical", "Skill-Based"
)
print("\nStyle Classification:")
print(style_metrics["style_type"].value_counts())

R Code

library(tidyverse)
library(ggplot2)
library(gridExtra)
library(factoextra)

# Load international style data
style_df <- read.csv("data/international_styles.csv")

# Calculate style metrics
style_metrics <- style_df %>%
  group_by(country) %>%
  summarise(
    physicality_index = mean(physicality_index, na.rm = TRUE),
    speed_rating = mean(speed_rating, na.rm = TRUE),
    skill_rating = mean(skill_rating, na.rm = TRUE),
    defensive_zone_time = mean(defensive_zone_time, na.rm = TRUE),
    offensive_zone_time = mean(offensive_zone_time, na.rm = TRUE),
    neutral_zone_time = mean(neutral_zone_time, na.rm = TRUE),
    dump_in_pct = mean(dump_in_pct, na.rm = TRUE),
    controlled_entry_pct = mean(controlled_entry_pct, na.rm = TRUE)
  )

print("Playing Style Metrics by Country:")
print(style_metrics)

# PCA analysis
style_matrix <- style_metrics %>%
  select(-country) %>%
  scale()

rownames(style_matrix) <- style_metrics$country
pca_result <- prcomp(style_matrix)

# Visualization
p1 <- fviz_pca_biplot(pca_result,
                      label = "var",
                      habillage = "none",
                      geom.ind = "point",
                      col.ind = "steelblue",
                      title = "Playing Style Similarity (PCA)")

p2 <- ggplot(style_metrics, aes(x = speed_rating, y = physicality_index,
                                 size = skill_rating, label = country)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_text(size = 3, vjust = -1, hjust = 0.5) +
  theme_minimal() +
  labs(title = "Speed vs Physicality", x = "Speed Rating", y = "Physicality Index")

# Zone time distribution
zone_time_data <- style_metrics %>%
  head(10) %>%
  select(country, defensive_zone_time, neutral_zone_time, offensive_zone_time) %>%
  pivot_longer(cols = -country, names_to = "zone", values_to = "time")

p3 <- ggplot(zone_time_data, aes(x = country, y = time, fill = zone)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Zone Time Distribution (Top 10 Countries)",
       x = "Country", y = "Time %") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("defensive_zone_time" = "#d62728",
                                "neutral_zone_time" = "#ff7f0e",
                                "offensive_zone_time" = "#2ca02c"),
                    labels = c("Defensive", "Neutral", "Offensive"))

# Entry style comparison
entry_data <- style_metrics %>%
  head(12) %>%
  select(country, dump_in_pct, controlled_entry_pct) %>%
  pivot_longer(cols = -country, names_to = "entry_type", values_to = "percentage")

p4 <- ggplot(entry_data, aes(x = country, y = percentage, fill = entry_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Entry Style: Dump-In vs Controlled",
       x = "Country", y = "Percentage") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("dump_in_pct" = "coral",
                                "controlled_entry_pct" = "steelblue"),
                    labels = c("Dump-In %", "Controlled Entry %"))

# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)

ggsave("outputs/international_style_differences_r.png", combined_plot,
       width = 14, height = 10, dpi = 300)

# Style classification
style_metrics <- style_metrics %>%
  mutate(style_type = ifelse(physicality_index > median(physicality_index),
                              "Physical", "Skill-Based"))

print("Style Classification:")
print(table(style_metrics$style_type))

IIHF World Championship Analysis Previous

Multi-League Projection Models Next

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.

Table of Contents

Playing Style Analysis by Country

Python Code

R Code

Test Your Knowledge

Discussion