Tracking Prospects Overseas

Beginner 10 min read 16 views Nov 27, 2025

Python Code

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Load prospect tracking data
prospects_df = pd.read_csv("data/international_prospects.csv")

# Calculate age and production metrics
prospects_df["age"] = (datetime.now().year - pd.to_datetime(prospects_df["birth_date"]).dt.year)
prospects_df["points_per_game"] = prospects_df["points"] / prospects_df["games_played"]
prospects_df["goals_per_game"] = prospects_df["goals"] / prospects_df["games_played"]

# Prospect rankings by league
prospect_rankings = prospects_df.sort_values("points_per_game", ascending=False)
print("Top 20 International Prospects:")
print(prospect_rankings[["player_name", "league", "age", "position", "points_per_game", "goals_per_game"]].head(20))

# Visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Points per game by league
sns.boxplot(data=prospects_df, x="league", y="points_per_game", ax=axes[0,0])
axes[0,0].set_title("Prospect Production by League")
axes[0,0].set_xlabel("League")
axes[0,0].set_ylabel("Points Per Game")
axes[0,0].tick_params(axis="x", rotation=45)

# Age distribution
sns.histplot(data=prospects_df, x="age", bins=15, kde=True, ax=axes[0,1])
axes[0,1].set_title("Prospect Age Distribution")
axes[0,1].set_xlabel("Age")
axes[0,1].set_ylabel("Count")

# Production by position
sns.boxplot(data=prospects_df, x="position", y="points_per_game", ax=axes[1,0])
axes[1,0].set_title("Production by Position")
axes[1,0].set_xlabel("Position")
axes[1,0].set_ylabel("Points Per Game")

# Age vs Production scatter
sns.scatterplot(data=prospects_df, x="age", y="points_per_game",
                hue="league", size="games_played", alpha=0.6, ax=axes[1,1])
axes[1,1].set_title("Age vs Production")
axes[1,1].set_xlabel("Age")
axes[1,1].set_ylabel("Points Per Game")

plt.tight_layout()
plt.savefig("outputs/international_prospect_tracking.png", dpi=300, bbox_inches="tight")
plt.show()

# NHL readiness score
prospects_df["nhl_readiness"] = (
    (prospects_df["points_per_game"] * 40) +
    (prospects_df["games_played"] / 10) +
    ((25 - prospects_df["age"]) * 2)
).clip(lower=0, upper=100)

top_ready = prospects_df.nlargest(15, "nhl_readiness")[["player_name", "league", "age", "nhl_readiness"]]
print("\nTop NHL-Ready Prospects:")
print(top_ready)

R Code

library(tidyverse)
library(lubridate)
library(ggplot2)
library(gridExtra)

# Load prospect data
prospects_df <- read.csv("data/international_prospects.csv")

# Calculate metrics
prospects_df <- prospects_df %>%
  mutate(
    birth_date = as.Date(birth_date),
    age = year(Sys.Date()) - year(birth_date),
    points_per_game = points / games_played,
    goals_per_game = goals / games_played
  )

# Top prospects ranking
prospect_rankings <- prospects_df %>%
  arrange(desc(points_per_game)) %>%
  select(player_name, league, age, position, points_per_game, goals_per_game) %>%
  head(20)

print("Top 20 International Prospects:")
print(prospect_rankings)

# Visualization
p1 <- ggplot(prospects_df, aes(x = league, y = points_per_game, fill = league)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Prospect Production by League", x = "League", y = "Points Per Game") +
  theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1))

p2 <- ggplot(prospects_df, aes(x = age)) +
  geom_histogram(bins = 15, fill = "steelblue", alpha = 0.7) +
  geom_density(aes(y = ..count..), color = "darkblue", size = 1) +
  theme_minimal() +
  labs(title = "Prospect Age Distribution", x = "Age", y = "Count")

p3 <- ggplot(prospects_df, aes(x = position, y = points_per_game, fill = position)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Production by Position", x = "Position", y = "Points Per Game") +
  theme(legend.position = "none")

p4 <- ggplot(prospects_df, aes(x = age, y = points_per_game, color = league, size = games_played)) +
  geom_point(alpha = 0.6) +
  theme_minimal() +
  labs(title = "Age vs Production", x = "Age", y = "Points Per Game")

# Combine plots
combined_plot <- grid.arrange(p1, p2, p3, p4, ncol = 2)

ggsave("outputs/international_prospect_tracking_r.png", combined_plot, width = 14, height = 10, dpi = 300)

# NHL readiness calculation
prospects_df <- prospects_df %>%
  mutate(
    nhl_readiness = pmax(0, pmin(100,
      (points_per_game * 40) +
      (games_played / 10) +
      ((25 - age) * 2)
    ))
  )

top_ready <- prospects_df %>%
  arrange(desc(nhl_readiness)) %>%
  select(player_name, league, age, nhl_readiness) %>%
  head(15)

print("Top NHL-Ready Prospects:")
print(top_ready)

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.