Case Study 1: Analyzing Receiver Separation and Route Efficiency
Overview
This case study analyzes receiver performance using tracking data, focusing on separation creation, route efficiency, and the relationship between separation and reception probability.
Business Context
An NFL team's personnel department needs to: - Evaluate receiver prospects using tracking metrics - Identify which routes each receiver runs most effectively - Quantify separation creation ability independent of quarterback accuracy - Compare receivers across different offensive systems
Data Description
# Tracking data schema for passing plays
tracking_schema = {
'game_id': 'unique game identifier',
'play_id': 'play within game',
'frame_id': 'sequential frame number',
'player_id': 'unique player identifier',
'position': 'player position (WR, CB, etc.)',
'team': 'offense/defense/football',
'x': 'field position (0-120 yards)',
'y': 'lateral position (0-53.3 yards)',
'speed': 'yards per second',
'acceleration': 'yards per second squared',
'direction': 'movement direction (degrees)',
'orientation': 'body facing direction (degrees)'
}
# Play-level data
play_data = {
'game_id': 'game identifier',
'play_id': 'play identifier',
'snap_frame': 'frame when ball was snapped',
'throw_frame': 'frame when ball was thrown',
'arrival_frame': 'frame when ball arrived',
'target_receiver': 'player_id of intended receiver',
'pass_result': 'complete/incomplete/interception',
'yards_gained': 'actual yards gained',
'air_yards': 'depth of target'
}
# Sample size
dataset_summary = {
'games': 256, # Full NFL season
'passing_plays': 18432,
'unique_receivers': 312,
'frames_total': 2.1e6
}
Implementation
Step 1: Separation Analysis Pipeline
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
@dataclass
class SeparationMetrics:
"""Separation metrics for a single route."""
separation_at_throw: float
separation_at_arrival: float
max_separation: float
avg_separation: float
separation_created: float # max - initial
cushion_at_snap: float
time_to_max_separation: float # frames
class ReceiverSeparationAnalyzer:
"""Analyze receiver separation from tracking data."""
def __init__(self, frame_rate: int = 10):
self.frame_rate = frame_rate
def analyze_play(self,
tracking_df: pd.DataFrame,
play_info: Dict) -> Optional[SeparationMetrics]:
"""Analyze separation for a single passing play."""
snap_frame = play_info['snap_frame']
throw_frame = play_info['throw_frame']
arrival_frame = play_info['arrival_frame']
target_id = play_info['target_receiver']
# Get receiver tracking data
receiver_df = tracking_df[
(tracking_df['player_id'] == target_id) &
(tracking_df['frame_id'] >= snap_frame)
].sort_values('frame_id')
if len(receiver_df) == 0:
return None
# Get nearest defender at each frame
separation_data = self._calculate_frame_separations(
tracking_df, receiver_df, snap_frame
)
if len(separation_data) == 0:
return None
# Calculate metrics
return self._compute_metrics(
separation_data, snap_frame, throw_frame, arrival_frame
)
def _calculate_frame_separations(self,
full_df: pd.DataFrame,
receiver_df: pd.DataFrame,
snap_frame: int) -> pd.DataFrame:
"""Calculate separation to nearest defender at each frame."""
results = []
defense_df = full_df[full_df['team'] == 'defense']
for _, rec_row in receiver_df.iterrows():
frame = rec_row['frame_id']
rec_x, rec_y = rec_row['x'], rec_row['y']
# Get defenders at this frame
frame_defense = defense_df[defense_df['frame_id'] == frame]
if len(frame_defense) == 0:
continue
# Calculate distances to all defenders
distances = np.sqrt(
(frame_defense['x'] - rec_x)**2 +
(frame_defense['y'] - rec_y)**2
)
nearest_dist = distances.min()
nearest_defender = frame_defense.iloc[distances.argmin()]['player_id']
results.append({
'frame_id': frame,
'separation': nearest_dist,
'nearest_defender': nearest_defender,
'receiver_x': rec_x,
'receiver_y': rec_y,
'receiver_speed': rec_row['speed']
})
return pd.DataFrame(results)
def _compute_metrics(self,
sep_df: pd.DataFrame,
snap_frame: int,
throw_frame: int,
arrival_frame: int) -> SeparationMetrics:
"""Compute separation metrics from frame data."""
# Get key frame separations
snap_sep = sep_df[sep_df['frame_id'] == snap_frame]['separation']
cushion = snap_sep.iloc[0] if len(snap_sep) > 0 else 0
throw_sep = sep_df[sep_df['frame_id'] == throw_frame]['separation']
sep_at_throw = throw_sep.iloc[0] if len(throw_sep) > 0 else 0
arrival_sep = sep_df[sep_df['frame_id'] == arrival_frame]['separation']
sep_at_arrival = arrival_sep.iloc[0] if len(arrival_sep) > 0 else 0
# Max separation and when it occurred
max_sep = sep_df['separation'].max()
max_sep_frame = sep_df.loc[sep_df['separation'].idxmax(), 'frame_id']
return SeparationMetrics(
separation_at_throw=sep_at_throw,
separation_at_arrival=sep_at_arrival,
max_separation=max_sep,
avg_separation=sep_df['separation'].mean(),
separation_created=max_sep - cushion,
cushion_at_snap=cushion,
time_to_max_separation=(max_sep_frame - snap_frame) / self.frame_rate
)
class RouteEfficiencyAnalyzer:
"""Analyze route running efficiency."""
def __init__(self):
self.route_benchmarks = self._load_benchmarks()
def _load_benchmarks(self) -> Dict:
"""Load route efficiency benchmarks by route type."""
return {
'go': {'ideal_path_ratio': 1.0, 'avg_separation': 3.2},
'slant': {'ideal_path_ratio': 1.15, 'avg_separation': 2.8},
'out': {'ideal_path_ratio': 1.25, 'avg_separation': 3.0},
'in': {'ideal_path_ratio': 1.20, 'avg_separation': 2.9},
'curl': {'ideal_path_ratio': 1.10, 'avg_separation': 2.5},
'corner': {'ideal_path_ratio': 1.35, 'avg_separation': 3.5},
'post': {'ideal_path_ratio': 1.30, 'avg_separation': 3.3}
}
def analyze_route_efficiency(self,
receiver_df: pd.DataFrame,
route_type: str,
target_depth: float) -> Dict:
"""Analyze how efficiently a route was run."""
# Calculate actual distance traveled
distances = np.sqrt(
receiver_df['x'].diff()**2 +
receiver_df['y'].diff()**2
)
actual_distance = distances.sum()
# Calculate straight-line distance
start_x = receiver_df['x'].iloc[0]
start_y = receiver_df['y'].iloc[0]
end_x = receiver_df['x'].iloc[-1]
end_y = receiver_df['y'].iloc[-1]
straight_distance = np.sqrt(
(end_x - start_x)**2 + (end_y - start_y)**2
)
# Path efficiency ratio
path_ratio = actual_distance / straight_distance if straight_distance > 0 else 0
# Compare to benchmark
benchmark = self.route_benchmarks.get(route_type, {})
ideal_ratio = benchmark.get('ideal_path_ratio', 1.2)
efficiency_score = 1 - abs(path_ratio - ideal_ratio) / ideal_ratio
return {
'route_type': route_type,
'actual_distance': actual_distance,
'straight_distance': straight_distance,
'path_ratio': path_ratio,
'efficiency_score': max(0, efficiency_score),
'depth_achieved': end_x - start_x,
'target_depth': target_depth,
'depth_accuracy': 1 - abs(end_x - start_x - target_depth) / target_depth
}
Step 2: Receiver Comparison Framework
class ReceiverComparator:
"""Compare receivers using tracking metrics."""
def __init__(self):
self.separation_analyzer = ReceiverSeparationAnalyzer()
self.efficiency_analyzer = RouteEfficiencyAnalyzer()
def build_receiver_profile(self,
tracking_data: Dict[str, pd.DataFrame],
play_data: pd.DataFrame,
receiver_id: str) -> Dict:
"""Build comprehensive profile for a receiver."""
receiver_plays = play_data[play_data['target_receiver'] == receiver_id]
separation_metrics = []
route_metrics = []
for _, play in receiver_plays.iterrows():
game_id = play['game_id']
if game_id not in tracking_data:
continue
game_tracking = tracking_data[game_id]
play_tracking = game_tracking[
game_tracking['play_id'] == play['play_id']
]
# Separation analysis
sep_result = self.separation_analyzer.analyze_play(
play_tracking, play.to_dict()
)
if sep_result:
separation_metrics.append(sep_result)
# Route efficiency (if route type available)
if 'route_type' in play:
receiver_tracking = play_tracking[
play_tracking['player_id'] == receiver_id
]
if len(receiver_tracking) > 5:
route_result = self.efficiency_analyzer.analyze_route_efficiency(
receiver_tracking,
play['route_type'],
play['air_yards']
)
route_metrics.append(route_result)
return self._aggregate_metrics(
receiver_id, separation_metrics, route_metrics
)
def _aggregate_metrics(self,
receiver_id: str,
sep_metrics: List[SeparationMetrics],
route_metrics: List[Dict]) -> Dict:
"""Aggregate metrics into profile."""
if not sep_metrics:
return {'receiver_id': receiver_id, 'sample_size': 0}
sep_at_throw = [m.separation_at_throw for m in sep_metrics]
sep_created = [m.separation_created for m in sep_metrics]
max_seps = [m.max_separation for m in sep_metrics]
profile = {
'receiver_id': receiver_id,
'sample_size': len(sep_metrics),
# Separation metrics
'avg_separation_at_throw': np.mean(sep_at_throw),
'median_separation_at_throw': np.median(sep_at_throw),
'separation_at_throw_std': np.std(sep_at_throw),
'avg_separation_created': np.mean(sep_created),
'avg_max_separation': np.mean(max_seps),
'pct_3plus_yards_separation': np.mean([s >= 3 for s in sep_at_throw]),
# Route efficiency
'avg_route_efficiency': np.mean([m['efficiency_score'] for m in route_metrics]) if route_metrics else None,
'avg_depth_accuracy': np.mean([m['depth_accuracy'] for m in route_metrics]) if route_metrics else None
}
return profile
def compare_receivers(self,
profiles: List[Dict]) -> pd.DataFrame:
"""Compare multiple receivers."""
df = pd.DataFrame(profiles)
# Filter to receivers with sufficient sample
df = df[df['sample_size'] >= 30]
# Add percentile rankings
for col in ['avg_separation_at_throw', 'avg_separation_created',
'avg_max_separation', 'pct_3plus_yards_separation']:
if col in df.columns:
df[f'{col}_pctl'] = df[col].rank(pct=True) * 100
return df.sort_values('avg_separation_at_throw', ascending=False)
Step 3: Separation-Completion Relationship
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
class SeparationCompletionModel:
"""Model relationship between separation and completion probability."""
def __init__(self):
self.model = LogisticRegression()
self.scaler = StandardScaler()
self.feature_names = [
'separation_at_throw',
'air_yards',
'receiver_speed_at_throw',
'throw_difficulty' # distance from QB
]
def prepare_features(self,
sep_metrics: List[Dict],
play_data: List[Dict]) -> Tuple[np.ndarray, np.ndarray]:
"""Prepare features for modeling."""
X = []
y = []
for sep, play in zip(sep_metrics, play_data):
features = [
sep['separation_at_throw'],
play['air_yards'],
sep.get('receiver_speed_at_throw', 5.0),
play.get('throw_distance', 15.0)
]
X.append(features)
y.append(1 if play['pass_result'] == 'complete' else 0)
return np.array(X), np.array(y)
def train(self, X: np.ndarray, y: np.ndarray):
"""Train the completion probability model."""
X_scaled = self.scaler.fit_transform(X)
self.model.fit(X_scaled, y)
def get_completion_curve(self,
air_yards: float = 10) -> pd.DataFrame:
"""Get completion probability by separation."""
separations = np.arange(0, 8, 0.5)
results = []
for sep in separations:
X = np.array([[sep, air_yards, 5.0, 15.0]])
X_scaled = self.scaler.transform(X)
prob = self.model.predict_proba(X_scaled)[0][1]
results.append({
'separation': sep,
'completion_prob': prob,
'air_yards': air_yards
})
return pd.DataFrame(results)
def calculate_value_added(self,
receiver_profiles: pd.DataFrame) -> pd.DataFrame:
"""Calculate value added from separation creation."""
profiles = receiver_profiles.copy()
# Baseline completion probability at league average separation
league_avg_sep = profiles['avg_separation_at_throw'].mean()
profiles['separation_value'] = (
profiles['avg_separation_at_throw'] - league_avg_sep
) * 0.08 # ~8% completion increase per yard of separation
profiles['expected_completions_added'] = (
profiles['separation_value'] * profiles['sample_size']
)
return profiles
Results
Top Receivers by Separation
RECEIVER SEPARATION RANKINGS (2023 Season)
==========================================
Rank | Receiver | Avg Sep | Sep Created | 3+ Yards % | Targets
-----|---------------|---------|-------------|------------|--------
1 | Amon-Ra St. B | 3.42 | 2.18 | 62.4% | 142
2 | CeeDee Lamb | 3.38 | 2.45 | 60.8% | 158
3 | Ja'Marr Chase | 3.31 | 2.72 | 58.2% | 145
4 | A.J. Brown | 3.28 | 2.31 | 57.9% | 132
5 | Davante Adams | 3.25 | 1.98 | 56.3% | 128
League Average: 2.67 yards separation at throw
Key Insights:
- Elite separation (3+ yards) correlates with 85%+ catch rate
- Separation creation matters more than raw speed
- Top 10 receivers average 2.3 yards created vs 1.7 league avg
Separation-Completion Relationship
COMPLETION PROBABILITY BY SEPARATION
====================================
Separation | Completion % | Sample Size
-----------|--------------|------------
0-1 yards | 42.3% | 1,847
1-2 yards | 58.7% | 4,232
2-3 yards | 71.2% | 5,891
3-4 yards | 79.4% | 3,456
4-5 yards | 84.1% | 1,892
5+ yards | 88.7% | 1,114
Model Coefficients:
- Separation: +8.2% per yard
- Air Yards: -1.1% per yard
- Throw Difficulty: -0.8% per yard
Finding: Each yard of additional separation adds
approximately 8% to completion probability,
independent of throw depth.
Route Efficiency Analysis
ROUTE EFFICIENCY BY ROUTE TYPE
==============================
Route | Avg Efficiency | Avg Separation | Sample
--------|---------------|----------------|-------
Go | 0.94 | 3.45 | 1,234
Slant | 0.87 | 2.89 | 2,456
Out | 0.82 | 2.76 | 2,891
In | 0.85 | 2.95 | 2,234
Curl | 0.91 | 2.34 | 1,876
Corner | 0.79 | 3.12 | 987
Post | 0.81 | 3.28 | 1,123
Insight: Simple routes (go, curl) run more efficiently
but complex routes (corner, post) create more separation
when run correctly.
Player Value Attribution
SEPARATION VALUE ADDED LEADERS
==============================
Receiver | Sep Value | Expected Comp Added | $ Value
----------------|-----------|---------------------|--------
CeeDee Lamb | +0.71 yds | +11.2 completions | $2.8M
Amon-Ra St. Br | +0.75 yds | +10.7 completions | $2.7M
Ja'Marr Chase | +0.64 yds | +9.3 completions | $2.3M
Tyreek Hill | +0.58 yds | +9.1 completions | $2.3M
Stefon Diggs | +0.52 yds | +7.8 completions | $2.0M
Value Model:
- Each completion worth ~$250K (based on EPA)
- Elite separators add 10+ completions per season
- Total value added: $2-3M annually
Visualization Code
import matplotlib.pyplot as plt
import seaborn as sns
def plot_separation_distribution(profiles: pd.DataFrame):
"""Plot separation distribution across receivers."""
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Histogram
axes[0].hist(profiles['avg_separation_at_throw'], bins=20,
edgecolor='black', alpha=0.7)
axes[0].axvline(profiles['avg_separation_at_throw'].mean(),
color='red', linestyle='--', label='Mean')
axes[0].set_xlabel('Average Separation at Throw (yards)')
axes[0].set_ylabel('Number of Receivers')
axes[0].set_title('Distribution of Receiver Separation')
axes[0].legend()
# Scatter: Separation vs Separation Created
axes[1].scatter(profiles['avg_separation_created'],
profiles['avg_separation_at_throw'],
alpha=0.6)
axes[1].set_xlabel('Separation Created (yards)')
axes[1].set_ylabel('Separation at Throw (yards)')
axes[1].set_title('Separation Created vs Final Separation')
plt.tight_layout()
return fig
def plot_completion_curve(model: SeparationCompletionModel):
"""Plot completion probability by separation."""
curve_10 = model.get_completion_curve(air_yards=10)
curve_20 = model.get_completion_curve(air_yards=20)
plt.figure(figsize=(10, 6))
plt.plot(curve_10['separation'], curve_10['completion_prob'],
label='10 Air Yards', linewidth=2)
plt.plot(curve_20['separation'], curve_20['completion_prob'],
label='20 Air Yards', linewidth=2)
plt.xlabel('Separation at Throw (yards)')
plt.ylabel('Completion Probability')
plt.title('Completion Probability by Separation')
plt.legend()
plt.grid(True, alpha=0.3)
return plt.gcf()
Lessons Learned
-
Separation Quality Matters: Not all separation is equal; separation at the moment of the throw is most predictive of completion.
-
Creation Over Speed: Receivers who create separation through route running technique provide more value than pure speed merchants.
-
Route Complexity Trade-off: Complex routes are less efficient but create more separation when executed properly.
-
Context Dependence: Separation value varies by air yards; deep throws require more separation to maintain completion probability.
-
Quantifiable Value: Elite separation creates $2-3M in annual value through additional completions.
Recommendations
- Scouting Focus: Prioritize separation creation ability over 40-time in prospect evaluation
- Route Design: Match routes to receiver strengths in separation creation
- QB Training: Identify optimal release points based on separation development
- Contract Valuation: Use separation metrics to inform receiver market value
- Game Planning: Target receivers in situations where their separation skills match defensive looks