Case Study 2: Player Performance Profiling for Trade Deadline Analysis
Overview
Scenario: The Sacramento Kings are active buyers at the trade deadline. The front office needs comprehensive player profiles for potential acquisition targets. Your task is to conduct exploratory data analysis on several candidates to identify their strengths, weaknesses, and fit with the Kings' current roster.
Duration: 3-4 hours Difficulty: Intermediate to Advanced Prerequisites: Chapter 4 concepts, multi-dimensional analysis skills
Background
The Kings have identified the following needs: 1. A secondary playmaker who can run the offense when De'Aaron Fox rests 2. Defensive versatility on the wing 3. Shooting from the forward positions
The trade market has several interesting candidates. Your EDA will help the front office understand each player's profile beyond traditional statistics.
Trade Candidates: - Player A: Score-first guard averaging 18 PPG on a rebuilding team - Player B: Defensive wing averaging 8 PPG with elite steal rates - Player C: Stretch four averaging 14 PPG on 40% from three
Part 1: Building the Analysis Framework
1.1 Multi-Dimensional Player Profile
"""
Trade Target Player Profiling Analysis
Case Study 2 - Chapter 4: Exploratory Data Analysis
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
# Configure plotting
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")
@dataclass
class PlayerProfile:
"""Container for player analysis results."""
name: str
player_id: int
season_stats: pd.DataFrame
game_logs: pd.DataFrame
shot_data: pd.DataFrame
tracking_stats: Optional[pd.DataFrame] = None
def load_player_data(player_id: int, player_name: str,
season: str = "2023-24") -> PlayerProfile:
"""
Load comprehensive data for a player.
Args:
player_id: NBA player identifier
player_name: Player's full name
season: Season to analyze
Returns:
PlayerProfile object with all data loaded
"""
from nba_api.stats.endpoints import (
playergamelog, shotchartdetail, playerdashboardbygeneralsplits
)
# Game logs
game_log = playergamelog.PlayerGameLog(
player_id=player_id,
season=season
)
game_logs = game_log.get_data_frames()[0]
# Shot chart
shots = shotchartdetail.ShotChartDetail(
player_id=player_id,
team_id=0,
season_nullable=season,
context_measure_simple='FGA'
)
shot_data = shots.get_data_frames()[0]
# Season splits
splits = playerdashboardbygeneralsplits.PlayerDashboardByGeneralSplits(
player_id=player_id,
season=season
)
season_stats = splits.get_data_frames()[0]
return PlayerProfile(
name=player_name,
player_id=player_id,
season_stats=season_stats,
game_logs=game_logs,
shot_data=shot_data
)
class PlayerProfileAnalyzer:
"""
Comprehensive player profiling through exploratory data analysis.
This class provides methods for analyzing various aspects of
a player's game through statistical exploration and visualization.
"""
def __init__(self, profile: PlayerProfile):
"""
Initialize analyzer with player data.
Args:
profile: PlayerProfile containing all player data
"""
self.profile = profile
self.findings = {}
def analyze_scoring_consistency(self) -> Dict:
"""
Analyze scoring consistency and patterns.
Returns:
Dictionary with consistency metrics and visualizations
"""
games = self.profile.game_logs
# Basic statistics
ppg = games['PTS'].mean()
ppg_std = games['PTS'].std()
ppg_cv = ppg_std / ppg if ppg > 0 else 0
median_pts = games['PTS'].median()
# Identify hot and cold streaks
games['ROLLING_5'] = games['PTS'].rolling(5, min_periods=1).mean()
# Games above/below average
above_avg = (games['PTS'] > ppg).sum()
below_avg = (games['PTS'] < ppg).sum()
# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# Histogram
axes[0, 0].hist(games['PTS'], bins=15, edgecolor='black', alpha=0.7)
axes[0, 0].axvline(ppg, color='red', linestyle='--', label=f'Mean: {ppg:.1f}')
axes[0, 0].axvline(median_pts, color='green', linestyle='--', label=f'Median: {median_pts:.1f}')
axes[0, 0].set_xlabel('Points')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title(f'{self.profile.name}: Scoring Distribution')
axes[0, 0].legend()
# Box plot with individual points
axes[0, 1].boxplot(games['PTS'], vert=True)
axes[0, 1].scatter(np.ones(len(games)) + np.random.normal(0, 0.04, len(games)),
games['PTS'], alpha=0.4, s=20)
axes[0, 1].set_ylabel('Points')
axes[0, 1].set_title('Scoring Variability')
# Time series
axes[1, 0].plot(range(len(games)), games['PTS'], 'o-', alpha=0.5, label='Game Points')
axes[1, 0].plot(range(len(games)), games['ROLLING_5'], linewidth=2, label='5-Game Rolling Avg')
axes[1, 0].axhline(ppg, color='red', linestyle='--', alpha=0.5, label='Season Avg')
axes[1, 0].set_xlabel('Game Number')
axes[1, 0].set_ylabel('Points')
axes[1, 0].set_title('Scoring Trend')
axes[1, 0].legend()
# Consistency metrics
metrics_text = (
f"Consistency Metrics:\n"
f"Mean: {ppg:.1f}\n"
f"Std Dev: {ppg_std:.1f}\n"
f"CV: {ppg_cv:.2f}\n"
f"Above Avg: {above_avg} games\n"
f"Below Avg: {below_avg} games"
)
axes[1, 1].text(0.5, 0.5, metrics_text, transform=axes[1, 1].transAxes,
fontsize=14, verticalalignment='center', horizontalalignment='center',
fontfamily='monospace',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
axes[1, 1].axis('off')
axes[1, 1].set_title('Summary')
plt.suptitle(f'{self.profile.name}: Scoring Consistency Analysis', fontsize=14)
plt.tight_layout()
self.findings['scoring_consistency'] = {
'ppg': ppg,
'std': ppg_std,
'cv': ppg_cv,
'median': median_pts,
'figure': fig
}
return self.findings['scoring_consistency']
def analyze_shot_profile(self) -> Dict:
"""
Analyze shot selection and efficiency by zone.
Returns:
Dictionary with shot profile analysis
"""
shots = self.profile.shot_data.copy()
# Calculate zone statistics
shots['X_FEET'] = shots['LOC_X'] / 10.0
shots['Y_FEET'] = shots['LOC_Y'] / 10.0
zone_stats = shots.groupby('SHOT_ZONE_BASIC').agg({
'SHOT_MADE_FLAG': ['count', 'sum', 'mean']
}).reset_index()
zone_stats.columns = ['ZONE', 'ATTEMPTS', 'MAKES', 'FG_PCT']
zone_stats['FREQUENCY'] = zone_stats['ATTEMPTS'] / zone_stats['ATTEMPTS'].sum() * 100
# Calculate points per shot by zone
def get_points_per_shot(row):
if 'Three' in row['ZONE'] or '3' in row['ZONE']:
return row['FG_PCT'] * 3
return row['FG_PCT'] * 2
zone_stats['PTS_PER_SHOT'] = zone_stats.apply(get_points_per_shot, axis=1)
# Create visualization
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
# Shot distribution
colors = plt.cm.RdYlGn(zone_stats['FG_PCT'])
bars = axes[0].barh(zone_stats['ZONE'], zone_stats['FREQUENCY'], color=colors)
axes[0].set_xlabel('Percentage of Shots')
axes[0].set_title('Shot Distribution by Zone')
# Add FG% labels
for i, (freq, pct) in enumerate(zip(zone_stats['FREQUENCY'], zone_stats['FG_PCT'])):
axes[0].text(freq + 0.5, i, f'{pct*100:.0f}%', va='center', fontsize=10)
# Shot chart
made = shots[shots['SHOT_MADE_FLAG'] == 1]
missed = shots[shots['SHOT_MADE_FLAG'] == 0]
axes[1].scatter(missed['X_FEET'], missed['Y_FEET'], c='red', marker='x',
alpha=0.4, s=20, label='Miss')
axes[1].scatter(made['X_FEET'], made['Y_FEET'], c='green', marker='o',
alpha=0.4, s=20, label='Make')
axes[1].set_xlim(-25, 25)
axes[1].set_ylim(-5, 35)
axes[1].set_aspect('equal')
axes[1].legend()
axes[1].set_title('Shot Chart')
# Points per shot by zone
sorted_zones = zone_stats.sort_values('PTS_PER_SHOT', ascending=True)
colors = ['green' if x > 1.0 else 'red' for x in sorted_zones['PTS_PER_SHOT']]
axes[2].barh(sorted_zones['ZONE'], sorted_zones['PTS_PER_SHOT'], color=colors, alpha=0.7)
axes[2].axvline(1.0, color='black', linestyle='--', label='League Avg ~1.0')
axes[2].set_xlabel('Points Per Shot')
axes[2].set_title('Efficiency by Zone')
axes[2].legend()
plt.suptitle(f'{self.profile.name}: Shot Profile Analysis', fontsize=14)
plt.tight_layout()
self.findings['shot_profile'] = {
'zone_stats': zone_stats,
'total_shots': len(shots),
'overall_fg': shots['SHOT_MADE_FLAG'].mean(),
'figure': fig
}
return self.findings['shot_profile']
def analyze_playmaking(self) -> Dict:
"""
Analyze playmaking ability and patterns.
Returns:
Dictionary with playmaking analysis
"""
games = self.profile.game_logs
# Basic playmaking metrics
apg = games['AST'].mean()
topg = games['TOV'].mean() if 'TOV' in games.columns else 0
ast_to = apg / topg if topg > 0 else float('inf')
# Assist distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# Assists histogram
axes[0].hist(games['AST'], bins=max(1, int(games['AST'].max())),
edgecolor='black', alpha=0.7, color='steelblue')
axes[0].axvline(apg, color='red', linestyle='--', label=f'Mean: {apg:.1f}')
axes[0].set_xlabel('Assists')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Assist Distribution')
axes[0].legend()
# Assist vs Points
axes[1].scatter(games['AST'], games['PTS'], alpha=0.6, s=50)
axes[1].set_xlabel('Assists')
axes[1].set_ylabel('Points')
axes[1].set_title('Assists vs Points')
# Calculate correlation
corr = games['AST'].corr(games['PTS'])
axes[1].annotate(f'r = {corr:.2f}', xy=(0.05, 0.95),
xycoords='axes fraction', fontsize=12)
# Assist-to-turnover
if 'TOV' in games.columns:
axes[2].scatter(games['TOV'], games['AST'], alpha=0.6, s=50)
axes[2].set_xlabel('Turnovers')
axes[2].set_ylabel('Assists')
axes[2].set_title(f'Playmaking Efficiency (AST/TO: {ast_to:.2f})')
else:
axes[2].text(0.5, 0.5, 'Turnover data not available',
transform=axes[2].transAxes, ha='center', va='center')
axes[2].axis('off')
plt.suptitle(f'{self.profile.name}: Playmaking Analysis', fontsize=14)
plt.tight_layout()
self.findings['playmaking'] = {
'apg': apg,
'topg': topg,
'ast_to': ast_to,
'figure': fig
}
return self.findings['playmaking']
def analyze_clutch_performance(self) -> Dict:
"""
Analyze performance in clutch situations.
Returns:
Dictionary with clutch analysis
"""
games = self.profile.game_logs.copy()
# Simulate clutch identification (in real data, use clutch splits)
# For this example, we'll look at close games (decided by <= 5 points)
# This is a simplification; real analysis would use actual clutch data
# High-leverage games proxy: games where player scored significantly above average
ppg = games['PTS'].mean()
high_performance = games[games['PTS'] > ppg * 1.2]
low_performance = games[games['PTS'] < ppg * 0.8]
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Performance distribution comparison
axes[0].hist(high_performance['PTS'], bins=10, alpha=0.6,
label=f'High Perf Games (n={len(high_performance)})', color='green')
axes[0].hist(low_performance['PTS'], bins=10, alpha=0.6,
label=f'Low Perf Games (n={len(low_performance)})', color='red')
axes[0].set_xlabel('Points')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Performance Distribution')
axes[0].legend()
# Monthly performance
games['GAME_DATE'] = pd.to_datetime(games['GAME_DATE'])
games['MONTH'] = games['GAME_DATE'].dt.month
monthly = games.groupby('MONTH')['PTS'].agg(['mean', 'std', 'count'])
monthly = monthly[monthly['count'] >= 3] # Filter months with few games
axes[1].bar(monthly.index, monthly['mean'],
yerr=monthly['std'], capsize=5, alpha=0.7)
axes[1].set_xlabel('Month')
axes[1].set_ylabel('Average Points')
axes[1].set_title('Scoring by Month')
axes[1].axhline(ppg, color='red', linestyle='--', alpha=0.5)
plt.suptitle(f'{self.profile.name}: Performance Patterns', fontsize=14)
plt.tight_layout()
self.findings['performance_patterns'] = {
'high_perf_games': len(high_performance),
'low_perf_games': len(low_performance),
'monthly_variance': monthly['mean'].std() if len(monthly) > 1 else 0,
'figure': fig
}
return self.findings['performance_patterns']
def generate_summary_report(self) -> str:
"""
Generate comprehensive player summary report.
Returns:
Formatted report string
"""
report = f"""
================================================================================
PLAYER PROFILE REPORT: {self.profile.name}
================================================================================
SCORING PROFILE
---------------
"""
if 'scoring_consistency' in self.findings:
sc = self.findings['scoring_consistency']
report += f"""Average: {sc['ppg']:.1f} PPG
Consistency (CV): {sc['cv']:.2f} ({'Consistent' if sc['cv'] < 0.4 else 'Variable'})
"""
report += """
SHOT SELECTION
--------------
"""
if 'shot_profile' in self.findings:
sp = self.findings['shot_profile']
report += f"""Total Shots: {sp['total_shots']}
Overall FG%: {sp['overall_fg']*100:.1f}%
"""
report += "\nTop Zones by Volume:\n"
for _, row in sp['zone_stats'].nlargest(3, 'FREQUENCY').iterrows():
report += f" {row['ZONE']}: {row['FREQUENCY']:.1f}% of shots, {row['FG_PCT']*100:.0f}% FG\n"
report += """
PLAYMAKING
----------
"""
if 'playmaking' in self.findings:
pm = self.findings['playmaking']
report += f"""Assists: {pm['apg']:.1f} APG
Turnovers: {pm['topg']:.1f} TOPG
AST/TO Ratio: {pm['ast_to']:.2f}
"""
report += """
FIT ASSESSMENT
--------------
"""
# Provide fit assessment based on findings
strengths = []
concerns = []
if 'scoring_consistency' in self.findings:
if self.findings['scoring_consistency']['cv'] < 0.35:
strengths.append("Consistent scorer")
if self.findings['scoring_consistency']['ppg'] > 15:
strengths.append("Proven scoring ability")
if 'playmaking' in self.findings:
if self.findings['playmaking']['apg'] > 5:
strengths.append("Strong playmaking")
if self.findings['playmaking']['ast_to'] < 1.5:
concerns.append("Turnover-prone")
report += "Strengths:\n"
for s in strengths:
report += f" + {s}\n"
report += "\nConcerns:\n"
for c in concerns:
report += f" - {c}\n"
report += """
================================================================================
"""
return report
Part 2: Comparative Analysis
2.1 Multi-Player Comparison Dashboard
def create_comparison_dashboard(profiles: List[PlayerProfile],
figsize: Tuple = (16, 12)) -> plt.Figure:
"""
Create comparison dashboard for multiple trade targets.
Args:
profiles: List of PlayerProfile objects
figsize: Figure dimensions
Returns:
Matplotlib figure with comparison visualizations
"""
fig, axes = plt.subplots(3, 2, figsize=figsize)
names = [p.name for p in profiles]
colors = plt.cm.Set2(np.linspace(0, 1, len(profiles)))
# 1. Scoring comparison (box plots)
scoring_data = [p.game_logs['PTS'] for p in profiles]
bp = axes[0, 0].boxplot(scoring_data, labels=names, patch_artist=True)
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
axes[0, 0].set_ylabel('Points')
axes[0, 0].set_title('Scoring Distribution Comparison')
# 2. Shooting efficiency radar chart (simplified bar chart)
categories = ['PPG', 'FG%', '3P%', 'FT%', 'APG']
x = np.arange(len(categories))
width = 0.8 / len(profiles)
for i, (profile, color) in enumerate(zip(profiles, colors)):
games = profile.game_logs
values = [
games['PTS'].mean() / 30, # Normalize to 0-1ish
games['FG_PCT'].mean() if 'FG_PCT' in games else 0,
games['FG3_PCT'].mean() if 'FG3_PCT' in games else 0,
games['FT_PCT'].mean() if 'FT_PCT' in games else 0,
games['AST'].mean() / 10 # Normalize
]
axes[0, 1].bar(x + i * width, values, width, label=profile.name, color=color)
axes[0, 1].set_xticks(x + width * (len(profiles) - 1) / 2)
axes[0, 1].set_xticklabels(categories)
axes[0, 1].set_ylabel('Normalized Value')
axes[0, 1].set_title('Key Stats Comparison')
axes[0, 1].legend()
# 3. Assist-Turnover scatter
for profile, color in zip(profiles, colors):
games = profile.game_logs
if 'TOV' in games.columns:
axes[1, 0].scatter(games['TOV'].mean(), games['AST'].mean(),
s=200, c=[color], label=profile.name, edgecolors='black')
axes[1, 0].set_xlabel('Turnovers Per Game')
axes[1, 0].set_ylabel('Assists Per Game')
axes[1, 0].set_title('Playmaking Efficiency')
axes[1, 0].legend()
# Add reference lines
axes[1, 0].axline((0, 0), slope=2, color='green', linestyle='--', alpha=0.3, label='2:1 AST/TO')
axes[1, 0].axline((0, 0), slope=1, color='red', linestyle='--', alpha=0.3, label='1:1 AST/TO')
# 4. Consistency comparison (CV)
cvs = []
for profile in profiles:
ppg = profile.game_logs['PTS'].mean()
std = profile.game_logs['PTS'].std()
cvs.append(std / ppg if ppg > 0 else 0)
axes[1, 1].barh(names, cvs, color=colors)
axes[1, 1].set_xlabel('Coefficient of Variation (lower = more consistent)')
axes[1, 1].set_title('Scoring Consistency')
axes[1, 1].axvline(0.35, color='green', linestyle='--', alpha=0.5)
# 5. Volume vs Efficiency
for profile, color in zip(profiles, colors):
games = profile.game_logs
fga = games['FGA'].mean() if 'FGA' in games else 0
fg_pct = games['FG_PCT'].mean() if 'FG_PCT' in games else 0
axes[2, 0].scatter(fga, fg_pct * 100, s=200, c=[color],
label=profile.name, edgecolors='black')
axes[2, 0].set_xlabel('Field Goal Attempts Per Game')
axes[2, 0].set_ylabel('Field Goal Percentage')
axes[2, 0].set_title('Volume vs Efficiency')
axes[2, 0].legend()
# 6. Summary metrics table
summary_data = []
for profile in profiles:
games = profile.game_logs
summary_data.append({
'Player': profile.name,
'PPG': f"{games['PTS'].mean():.1f}",
'APG': f"{games['AST'].mean():.1f}",
'RPG': f"{games['REB'].mean():.1f}" if 'REB' in games else 'N/A',
'FG%': f"{games['FG_PCT'].mean()*100:.1f}%" if 'FG_PCT' in games else 'N/A'
})
axes[2, 1].axis('off')
table = axes[2, 1].table(
cellText=[[d[k] for k in ['Player', 'PPG', 'APG', 'RPG', 'FG%']] for d in summary_data],
colLabels=['Player', 'PPG', 'APG', 'RPG', 'FG%'],
loc='center',
cellLoc='center'
)
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1.2, 1.5)
axes[2, 1].set_title('Summary Statistics')
plt.suptitle('Trade Target Comparison Dashboard', fontsize=16, y=1.02)
plt.tight_layout()
return fig
Part 3: Discussion Questions
Question 1: Fit Over Talent
Player A has the best individual statistics but plays a position the Kings already have filled. How do you weigh individual talent against team fit in your EDA recommendations?
Question 2: Sample Size
One candidate has played only 25 games due to injury. How do you account for small sample sizes in your analysis and communicate uncertainty to the front office?
Question 3: Context Adjustments
Players on bad teams often have inflated statistics due to higher usage. How would you adjust your analysis to account for team context?
Question 4: Missing Data
Tracking data that would reveal defensive ability is not available for one candidate. How do you handle this gap in your comparative analysis?
Deliverables
- Individual Player Reports: Comprehensive EDA for each trade target
- Comparison Dashboard: Multi-player comparison visualization
- Recommendation Memo: Summary of findings with trade recommendations
- Data Quality Notes: Documentation of any data limitations
- Interactive Notebook: Jupyter notebook allowing front office to explore data
Key Takeaways
- Multi-dimensional analysis reveals player profiles beyond box scores
- Visualization enables comparison across many dimensions simultaneously
- Context matters - team role, opponent quality, and sample size affect interpretation
- EDA informs but doesn't decide - analysis supports but doesn't replace basketball judgment
- Communication is key - findings must be accessible to non-technical stakeholders