Tracking Prospects Overseas

Beginner 10 min read 0 views Nov 27, 2025

Python Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load prospect tracking data
prospects_df = pd.read_csv("data/international_prospects.csv")

# Calculate age and production metrics
prospects_df["age"] = (datetime.now().year - pd.to_datetime(prospects_df["birth_date"]).dt.year)
prospects_df["points_per_game"] = prospects_df["points"] / prospects_df["games_played"]
prospects_df["goals_per_game"] = prospects_df["goals"] / prospects_df["games_played"]

# Prospect rankings by league
prospect_rankings = prospects_df.sort_values("points_per_game", ascending=False)
print("Top 20 International Prospects:")
print(prospect_rankings[["player_name", "league", "age", "position", "points_per_game", "goals_per_game"]].head(20))

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Points per game by league
sns.boxplot(data=prospects_df, x="league", y="points_per_game", ax=axes[0,0])
axes[0,0].set_title("Prospect Production by League")
axes[0,0].set_xlabel("League")
axes[0,0].set_ylabel("Points Per Game")
axes[0,0].tick_params(axis="x", rotation=45)

# Age distribution
sns.histplot(data=prospects_df, x="age", bins=15, kde=True, ax=axes[0,1])
axes[0,1].set_title("Prospect Age Distribution")
axes[0,1].set_xlabel("Age")
axes[0,1].set_ylabel("Count")

# Production by position
sns.boxplot(data=prospects_df, x="position", y="points_per_game", ax=axes[1,0])
axes[1,0].set_title("Production by Position")
axes[1,0].set_xlabel("Position")
axes[1,0].set_ylabel("Points Per Game")

# Age vs Production scatter
sns.scatterplot(data=prospects_df, x="age", y="points_per_game",
                hue="league", size="games_played", alpha=0.6, ax=axes[1,1])
axes[1,1].set_title("Age vs Production")
axes[1,1].set_xlabel("Age")
axes[1,1].set_ylabel("Points Per Game")

plt.tight_layout()
plt.savefig("outputs/international_prospect_tracking.png", dpi=300, bbox_inches="tight")
plt.show()

# NHL readiness score
prospects_df["nhl_readiness"] = (
    (prospects_df["points_per_game"] * 40) +
    (prospects_df["games_played"] / 10) +
    ((25 - prospects_df["age"]) * 2)
).clip(lower=0, upper=100)

top_ready = prospects_df.nlargest(15, "nhl_readiness")[["player_name", "league", "age", "nhl_readiness"]]
print("\nTop NHL-Ready Prospects:")
print(top_ready)

R Code

library(tidyverse)
library(lubridate)
library(ggplot2)
library(gridExtra)

# Load prospect data
prospects_df <- read.csv("data/international_prospects.csv")

# Calculate metrics
prospects_df <- prospects_df %>%
  mutate(
    birth_date = as.Date(birth_date),
    age = year(Sys.Date()) - year(birth_date),
    points_per_game = points / games_played,
    goals_per_game = goals / games_played
  )

# Top prospects ranking
prospect_rankings <- prospects_df %>%
  arrange(desc(points_per_game)) %>%
  select(player_name, league, age, position, points_per_game, goals_per_game) %>%
  head(20)

print("Top 20 International Prospects:")
print(prospect_rankings)

# Visualization
p1 <- ggplot(prospects_df, aes(x = league, y = points_per_game, fill = league)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Prospect Production by League", x = "League", y = "Points Per Game") +
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))

p2 <- ggplot(prospects_df, aes(x = age)) +
  geom_histogram(bins = 15, fill = "steelblue", alpha = 0.7) +
  geom_density(aes(y = ..count..), color = "darkblue", size = 1) +
  theme_minimal() +
  labs(title = "Prospect Age Distribution", x = "Age", y = "Count")

p3 <- ggplot(prospects_df, aes(x = position, y = points_per_game, fill = position)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Production by Position", x = "Position", y = "Points Per Game") +
  theme(legend.position = "none")

p4 <- ggplot(prospects_df, aes(x = age, y = points_per_game, color = league, size = games_played)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  labs(title = "Age vs Production", x = "Age", y = "Points Per Game")

# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)

ggsave("outputs/international_prospect_tracking_r.png", combined_plot, width = 14, height = 10, dpi = 300)

# NHL readiness calculation
prospects_df <- prospects_df %>%
  mutate(
    nhl_readiness = pmax(0, pmin(100,
      (points_per_game * 40) +
      (games_played / 10) +
      ((25 - age) * 2)
    ))
  )

top_ready <- prospects_df %>%
  arrange(desc(nhl_readiness)) %>%
  select(player_name, league, age, nhl_readiness) %>%
  head(15)

print("Top NHL-Ready Prospects:")
print(top_ready)

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.