Playing Style Analysis by Country
Beginner
10 min read
1 views
Nov 27, 2025
Python Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# Load international style data
style_df = pd.read_csv("data/international_styles.csv")
# Calculate style metrics
style_metrics = style_df.groupby("country").agg({
"physicality_index": "mean",
"speed_rating": "mean",
"skill_rating": "mean",
"defensive_zone_time": "mean",
"offensive_zone_time": "mean",
"neutral_zone_time": "mean",
"dump_in_pct": "mean",
"controlled_entry_pct": "mean"
}).round(2)
print("Playing Style Metrics by Country:")
print(style_metrics)
# Normalize for PCA
scaler = StandardScaler()
style_normalized = scaler.fit_transform(style_metrics)
# PCA analysis
pca = PCA(n_components=2)
style_pca = pca.fit_transform(style_normalized)
# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# PCA plot
axes[0,0].scatter(style_pca[:, 0], style_pca[:, 1], s=100, alpha=0.6)
for i, country in enumerate(style_metrics.index):
axes[0,0].annotate(country, (style_pca[i, 0], style_pca[i, 1]),
fontsize=8, alpha=0.8)
axes[0,0].set_title("Playing Style Similarity (PCA)")
axes[0,0].set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]:.1%})")
axes[0,0].set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]:.1%})")
axes[0,0].grid(True, alpha=0.3)
# Speed vs Physicality
axes[0,1].scatter(style_metrics["speed_rating"],
style_metrics["physicality_index"],
s=style_metrics["skill_rating"]*10, alpha=0.6)
for country in style_metrics.index[:8]:
axes[0,1].annotate(country,
(style_metrics.loc[country, "speed_rating"],
style_metrics.loc[country, "physicality_index"]),
fontsize=7)
axes[0,1].set_title("Speed vs Physicality")
axes[0,1].set_xlabel("Speed Rating")
axes[0,1].set_ylabel("Physicality Index")
axes[0,1].grid(True, alpha=0.3)
# Zone time comparison
zone_time_data = style_metrics[[
"defensive_zone_time", "neutral_zone_time", "offensive_zone_time"
]].head(10)
zone_time_data.plot(kind="bar", stacked=True, ax=axes[1,0])
axes[1,0].set_title("Zone Time Distribution (Top 10 Countries)")
axes[1,0].set_xlabel("Country")
axes[1,0].set_ylabel("Time %")
axes[1,0].legend(["Defensive", "Neutral", "Offensive"], loc="upper right")
axes[1,0].tick_params(axis="x", rotation=45)
# Entry style comparison
entry_comparison = style_metrics[["dump_in_pct", "controlled_entry_pct"]].head(12)
entry_comparison.plot(kind="bar", ax=axes[1,1])
axes[1,1].set_title("Entry Style: Dump-In vs Controlled")
axes[1,1].set_xlabel("Country")
axes[1,1].set_ylabel("Percentage")
axes[1,1].legend(["Dump-In %", "Controlled Entry %"])
axes[1,1].tick_params(axis="x", rotation=45)
plt.tight_layout()
plt.savefig("outputs/international_style_differences.png", dpi=300, bbox_inches="tight")
plt.show()
# Style clustering
style_metrics["style_type"] = np.where(
style_metrics["physicality_index"] > style_metrics["physicality_index"].median(),
"Physical", "Skill-Based"
)
print("\nStyle Classification:")
print(style_metrics["style_type"].value_counts())R Code
library(tidyverse)
library(ggplot2)
library(gridExtra)
library(factoextra)
# Load international style data
style_df <- read.csv("data/international_styles.csv")
# Calculate style metrics
style_metrics <- style_df %>%
group_by(country) %>%
summarise(
physicality_index = mean(physicality_index, na.rm = TRUE),
speed_rating = mean(speed_rating, na.rm = TRUE),
skill_rating = mean(skill_rating, na.rm = TRUE),
defensive_zone_time = mean(defensive_zone_time, na.rm = TRUE),
offensive_zone_time = mean(offensive_zone_time, na.rm = TRUE),
neutral_zone_time = mean(neutral_zone_time, na.rm = TRUE),
dump_in_pct = mean(dump_in_pct, na.rm = TRUE),
controlled_entry_pct = mean(controlled_entry_pct, na.rm = TRUE)
)
print("Playing Style Metrics by Country:")
print(style_metrics)
# PCA analysis
style_matrix <- style_metrics %>%
select(-country) %>%
scale()
rownames(style_matrix) <- style_metrics$country
pca_result <- prcomp(style_matrix)
# Visualization
p1 <- fviz_pca_biplot(pca_result,
label = "var",
habillage = "none",
geom.ind = "point",
col.ind = "steelblue",
title = "Playing Style Similarity (PCA)")
p2 <- ggplot(style_metrics, aes(x = speed_rating, y = physicality_index,
size = skill_rating, label = country)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_text(size = 3, vjust = -1, hjust = 0.5) +
theme_minimal() +
labs(title = "Speed vs Physicality", x = "Speed Rating", y = "Physicality Index")
# Zone time distribution
zone_time_data <- style_metrics %>%
head(10) %>%
select(country, defensive_zone_time, neutral_zone_time, offensive_zone_time) %>%
pivot_longer(cols = -country, names_to = "zone", values_to = "time")
p3 <- ggplot(zone_time_data, aes(x = country, y = time, fill = zone)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(title = "Zone Time Distribution (Top 10 Countries)",
x = "Country", y = "Time %") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("defensive_zone_time" = "#d62728",
"neutral_zone_time" = "#ff7f0e",
"offensive_zone_time" = "#2ca02c"),
labels = c("Defensive", "Neutral", "Offensive"))
# Entry style comparison
entry_data <- style_metrics %>%
head(12) %>%
select(country, dump_in_pct, controlled_entry_pct) %>%
pivot_longer(cols = -country, names_to = "entry_type", values_to = "percentage")
p4 <- ggplot(entry_data, aes(x = country, y = percentage, fill = entry_type)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Entry Style: Dump-In vs Controlled",
x = "Country", y = "Percentage") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("dump_in_pct" = "coral",
"controlled_entry_pct" = "steelblue"),
labels = c("Dump-In %", "Controlled Entry %"))
# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)
ggsave("outputs/international_style_differences_r.png", combined_plot,
width = 14, height = 10, dpi = 300)
# Style classification
style_metrics <- style_metrics %>%
mutate(style_type = ifelse(physicality_index > median(physicality_index),
"Physical", "Skill-Based"))
print("Style Classification:")
print(table(style_metrics$style_type))Discussion
Have questions or feedback? Join our community discussion on
Discord or
GitHub Discussions.
Table of Contents
Related Topics
Quick Actions