Playing Style Analysis by Country

Beginner 10 min read 0 views Nov 27, 2025

Python Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Load international style data
style_df = pd.read_csv("data/international_styles.csv")

# Calculate style metrics
style_metrics = style_df.groupby("country").agg({
    "physicality_index": "mean",
    "speed_rating": "mean",
    "skill_rating": "mean",
    "defensive_zone_time": "mean",
    "offensive_zone_time": "mean",
    "neutral_zone_time": "mean",
    "dump_in_pct": "mean",
    "controlled_entry_pct": "mean"
}).round(2)

print("Playing Style Metrics by Country:")
print(style_metrics)

# Normalize for PCA
scaler = StandardScaler()
style_normalized = scaler.fit_transform(style_metrics)

# PCA analysis
pca = PCA(n_components=2)
style_pca = pca.fit_transform(style_normalized)

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# PCA plot
axes[0,0].scatter(style_pca[:, 0], style_pca[:, 1], s=100, alpha=0.6)
for i, country in enumerate(style_metrics.index):
    axes[0,0].annotate(country, (style_pca[i, 0], style_pca[i, 1]),
                       fontsize=8, alpha=0.8)
axes[0,0].set_title("Playing Style Similarity (PCA)")
axes[0,0].set_xlabel(f"PC1 ({pca.explained_variance_ratio_[0]:.1%})")
axes[0,0].set_ylabel(f"PC2 ({pca.explained_variance_ratio_[1]:.1%})")
axes[0,0].grid(True, alpha=0.3)

# Speed vs Physicality
axes[0,1].scatter(style_metrics["speed_rating"],
                  style_metrics["physicality_index"],
                  s=style_metrics["skill_rating"]*10, alpha=0.6)
for country in style_metrics.index[:8]:
    axes[0,1].annotate(country,
                       (style_metrics.loc[country, "speed_rating"],
                        style_metrics.loc[country, "physicality_index"]),
                       fontsize=7)
axes[0,1].set_title("Speed vs Physicality")
axes[0,1].set_xlabel("Speed Rating")
axes[0,1].set_ylabel("Physicality Index")
axes[0,1].grid(True, alpha=0.3)

# Zone time comparison
zone_time_data = style_metrics[[
    "defensive_zone_time", "neutral_zone_time", "offensive_zone_time"
]].head(10)
zone_time_data.plot(kind="bar", stacked=True, ax=axes[1,0])
axes[1,0].set_title("Zone Time Distribution (Top 10 Countries)")
axes[1,0].set_xlabel("Country")
axes[1,0].set_ylabel("Time %")
axes[1,0].legend(["Defensive", "Neutral", "Offensive"], loc="upper right")
axes[1,0].tick_params(axis="x", rotation=45)

# Entry style comparison
entry_comparison = style_metrics[["dump_in_pct", "controlled_entry_pct"]].head(12)
entry_comparison.plot(kind="bar", ax=axes[1,1])
axes[1,1].set_title("Entry Style: Dump-In vs Controlled")
axes[1,1].set_xlabel("Country")
axes[1,1].set_ylabel("Percentage")
axes[1,1].legend(["Dump-In %", "Controlled Entry %"])
axes[1,1].tick_params(axis="x", rotation=45)

plt.tight_layout()
plt.savefig("outputs/international_style_differences.png", dpi=300, bbox_inches="tight")
plt.show()

# Style clustering
style_metrics["style_type"] = np.where(
    style_metrics["physicality_index"] > style_metrics["physicality_index"].median(),
    "Physical", "Skill-Based"
)
print("\nStyle Classification:")
print(style_metrics["style_type"].value_counts())

R Code

library(tidyverse)
library(ggplot2)
library(gridExtra)
library(factoextra)

# Load international style data
style_df <- read.csv("data/international_styles.csv")

# Calculate style metrics
style_metrics <- style_df %>%
  group_by(country) %>%
  summarise(
    physicality_index = mean(physicality_index, na.rm = TRUE),
    speed_rating = mean(speed_rating, na.rm = TRUE),
    skill_rating = mean(skill_rating, na.rm = TRUE),
    defensive_zone_time = mean(defensive_zone_time, na.rm = TRUE),
    offensive_zone_time = mean(offensive_zone_time, na.rm = TRUE),
    neutral_zone_time = mean(neutral_zone_time, na.rm = TRUE),
    dump_in_pct = mean(dump_in_pct, na.rm = TRUE),
    controlled_entry_pct = mean(controlled_entry_pct, na.rm = TRUE)
  )

print("Playing Style Metrics by Country:")
print(style_metrics)

# PCA analysis
style_matrix <- style_metrics %>%
  select(-country) %>%
  scale()

rownames(style_matrix) <- style_metrics$country
pca_result <- prcomp(style_matrix)

# Visualization
p1 <- fviz_pca_biplot(pca_result,
                      label = "var",
                      habillage = "none",
                      geom.ind = "point",
                      col.ind = "steelblue",
                      title = "Playing Style Similarity (PCA)")

p2 <- ggplot(style_metrics, aes(x = speed_rating, y = physicality_index,
                                 size = skill_rating, label = country)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_text(size = 3, vjust = -1, hjust = 0.5) +
  theme_minimal() +
  labs(title = "Speed vs Physicality", x = "Speed Rating", y = "Physicality Index")

# Zone time distribution
zone_time_data <- style_metrics %>%
  head(10) %>%
  select(country, defensive_zone_time, neutral_zone_time, offensive_zone_time) %>%
  pivot_longer(cols = -country, names_to = "zone", values_to = "time")

p3 <- ggplot(zone_time_data, aes(x = country, y = time, fill = zone)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "Zone Time Distribution (Top 10 Countries)",
       x = "Country", y = "Time %") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("defensive_zone_time" = "#d62728",
                                "neutral_zone_time" = "#ff7f0e",
                                "offensive_zone_time" = "#2ca02c"),
                    labels = c("Defensive", "Neutral", "Offensive"))

# Entry style comparison
entry_data <- style_metrics %>%
  head(12) %>%
  select(country, dump_in_pct, controlled_entry_pct) %>%
  pivot_longer(cols = -country, names_to = "entry_type", values_to = "percentage")

p4 <- ggplot(entry_data, aes(x = country, y = percentage, fill = entry_type)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Entry Style: Dump-In vs Controlled",
       x = "Country", y = "Percentage") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("dump_in_pct" = "coral",
                                "controlled_entry_pct" = "steelblue"),
                    labels = c("Dump-In %", "Controlled Entry %"))

# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)

ggsave("outputs/international_style_differences_r.png", combined_plot,
       width = 14, height = 10, dpi = 300)

# Style classification
style_metrics <- style_metrics %>%
  mutate(style_type = ifelse(physicality_index > median(physicality_index),
                              "Physical", "Skill-Based"))

print("Style Classification:")
print(table(style_metrics$style_type))

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.