Playing Style Analysis by Country
Beginner
10 min read
18 views
Nov 27, 2025
Python Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# Load international style data
style_df = pd.read_csv("data/international_styles.csv")
# Calculate style metrics
style_metrics = style_df.groupby("country").agg({
"physicality_index": "mean",
"speed_rating": "mean",
"skill_rating": "mean",
"defensive_zone_time": "mean",
"offensive_zone_time": "mean",
"neutral_zone_time": "mean",
"dump_in_pct": "mean",
"controlled_entry_pct": "mean"
}).round(2)
print("Playing Style Metrics by Country:")
print(style_metrics)
# Normalize for PCA
scaler = StandardScaler()
style_normalized = scaler.fit_transform(style_metrics)
# PCA analysis
pca = PCA(n_components=2)
style_pca = pca.fit_transform(style_normalized)
# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# PCA plot
axes[0,0].scatter(style_pca[:, 0], style_pca[:, 1], s=100, alpha=0.6)
for i, country in enumerate(style_metrics.index):
axes[0,0].annotate(country, (style_pca[i, 0], style_pca[i, 1]),
fontsize=8, alpha=0.8)
axes[0,0].set_title("Playing Style Similarity (PCA)")
axes[0,0].set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]:.1%})")
axes[0,0].set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]:.1%})")
axes[0,0].grid(True, alpha=0.3)
# Speed vs Physicality
axes[0,1].scatter(style_metrics["speed_rating"],
style_metrics["physicality_index"],
s=style_metrics["skill_rating"]*10, alpha=0.6)
for country in style_metrics.index[:8]:
axes[0,1].annotate(country,
(style_metrics.loc[country, "speed_rating"],
style_metrics.loc[country, "physicality_index"]),
fontsize=7)
axes[0,1].set_title("Speed vs Physicality")
axes[0,1].set_xlabel("Speed Rating")
axes[0,1].set_ylabel("Physicality Index")
axes[0,1].grid(True, alpha=0.3)
# Zone time comparison
zone_time_data = style_metrics[[
"defensive_zone_time", "neutral_zone_time", "offensive_zone_time"
]].head(10)
zone_time_data.plot(kind="bar", stacked=True, ax=axes[1,0])
axes[1,0].set_title("Zone Time Distribution (Top 10 Countries)")
axes[1,0].set_xlabel("Country")
axes[1,0].set_ylabel("Time %")
axes[1,0].legend(["Defensive", "Neutral", "Offensive"], loc="upper right")
axes[1,0].tick_params(axis="x", rotation=45)
# Entry style comparison
entry_comparison = style_metrics[["dump_in_pct", "controlled_entry_pct"]].head(12)
entry_comparison.plot(kind="bar", ax=axes[1,1])
axes[1,1].set_title("Entry Style: Dump-In vs Controlled")
axes[1,1].set_xlabel("Country")
axes[1,1].set_ylabel("Percentage")
axes[1,1].legend(["Dump-In %", "Controlled Entry %"])
axes[1,1].tick_params(axis="x", rotation=45)
plt.tight_layout()
plt.savefig("outputs/international_style_differences.png", dpi=300, bbox_inches="tight")
plt.show()
# Style clustering
style_metrics["style_type"] = np.where(
style_metrics["physicality_index"] > style_metrics["physicality_index"].median(),
"Physical", "Skill-Based"
)
print("\nStyle Classification:")
print(style_metrics["style_type"].value_counts())R Code
library(tidyverse)
library(ggplot2)
library(gridExtra)
library(factoextra)
# Load international style data
style_df <- read.csv("data/international_styles.csv")
# Calculate style metrics
style_metrics <- style_df %>%
group_by(country) %>%
summarise(
physicality_index = mean(physicality_index, na.rm = TRUE),
speed_rating = mean(speed_rating, na.rm = TRUE),
skill_rating = mean(skill_rating, na.rm = TRUE),
defensive_zone_time = mean(defensive_zone_time, na.rm = TRUE),
offensive_zone_time = mean(offensive_zone_time, na.rm = TRUE),
neutral_zone_time = mean(neutral_zone_time, na.rm = TRUE),
dump_in_pct = mean(dump_in_pct, na.rm = TRUE),
controlled_entry_pct = mean(controlled_entry_pct, na.rm = TRUE)
)
print("Playing Style Metrics by Country:")
print(style_metrics)
# PCA analysis
style_matrix <- style_metrics %>%
select(-country) %>%
scale()
rownames(style_matrix) <- style_metrics$country
pca_result <- prcomp(style_matrix)
# Visualization
p1 <- fviz_pca_biplot(pca_result,
label = "var",
habillage = "none",
geom.ind = "point",
col.ind = "steelblue",
title = "Playing Style Similarity (PCA)")
p2 <- ggplot(style_metrics, aes(x = speed_rating, y = physicality_index,
size = skill_rating, label = country)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_text(size = 3, vjust = -1, hjust = 0.5) +
theme_minimal() +
labs(title = "Speed vs Physicality", x = "Speed Rating", y = "Physicality Index")
# Zone time distribution
zone_time_data <- style_metrics %>%
head(10) %>%
select(country, defensive_zone_time, neutral_zone_time, offensive_zone_time) %>%
pivot_longer(cols = -country, names_to = "zone", values_to = "time")
p3 <- ggplot(zone_time_data, aes(x = country, y = time, fill = zone)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(title = "Zone Time Distribution (Top 10 Countries)",
x = "Country", y = "Time %") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("defensive_zone_time" = "#d62728",
"neutral_zone_time" = "#ff7f0e",
"offensive_zone_time" = "#2ca02c"),
labels = c("Defensive", "Neutral", "Offensive"))
# Entry style comparison
entry_data <- style_metrics %>%
head(12) %>%
select(country, dump_in_pct, controlled_entry_pct) %>%
pivot_longer(cols = -country, names_to = "entry_type", values_to = "percentage")
p4 <- ggplot(entry_data, aes(x = country, y = percentage, fill = entry_type)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Entry Style: Dump-In vs Controlled",
x = "Country", y = "Percentage") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("dump_in_pct" = "coral",
"controlled_entry_pct" = "steelblue"),
labels = c("Dump-In %", "Controlled Entry %"))
# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)
ggsave("outputs/international_style_differences_r.png", combined_plot,
width = 14, height = 10, dpi = 300)
# Style classification
style_metrics <- style_metrics %>%
mutate(style_type = ifelse(physicality_index > median(physicality_index),
"Physical", "Skill-Based"))
print("Style Classification:")
print(table(style_metrics$style_type))Discussion
Have questions or feedback? Join our community discussion on
Discord or
GitHub Discussions.
Table of Contents
Related Topics
Quick Actions