Case Study 1: Analyzing Passing Network Evolution
Overview
This case study analyzes how a team's passing network evolves over a season, identifying structural changes that correlate with offensive performance.
Business Context
A college football program's analytics staff needs to: - Track how the passing game structure changes week-to-week - Identify when target distribution shifts significantly - Correlate network structure with offensive success - Inform game planning and personnel decisions
Data Description
# Play-by-play passing data
pbp_schema = {
'game_id': 'unique game identifier',
'week': 'week number (1-15)',
'play_id': 'play identifier',
'passer': 'QB name',
'receiver': 'target name',
'complete': 'boolean',
'yards': 'yards gained',
'epa': 'expected points added',
'route_type': 'route run',
'target_depth': 'air yards'
}
# Season summary
season_data = {
'team': 'Ohio State',
'season': 2023,
'games': 13,
'total_passes': 412,
'unique_targets': 12,
'primary_qb': 'Kyle McCord'
}
Implementation
Step 1: Build Weekly Networks
import networkx as nx
import pandas as pd
import numpy as np
from typing import Dict, List
class WeeklyPassingNetworkBuilder:
"""Build and analyze passing networks by week."""
def __init__(self):
self.weekly_networks: Dict[int, nx.DiGraph] = {}
self.weekly_metrics: List[Dict] = []
def build_weekly_networks(self, passes: pd.DataFrame):
"""Build separate network for each week."""
for week in sorted(passes['week'].unique()):
week_passes = passes[passes['week'] == week]
G = self._build_network(week_passes)
self.weekly_networks[week] = G
# Calculate metrics
metrics = self._calculate_network_metrics(G, week)
self.weekly_metrics.append(metrics)
def _build_network(self, passes: pd.DataFrame) -> nx.DiGraph:
"""Build network from pass data."""
G = nx.DiGraph()
# Aggregate
agg = passes.groupby(['passer', 'receiver']).agg({
'play_id': 'count',
'complete': 'sum',
'yards': 'sum',
'epa': 'sum'
}).reset_index()
for _, row in agg.iterrows():
G.add_edge(
row['passer'],
row['receiver'],
targets=row['play_id'],
completions=row['complete'],
yards=row['yards'],
epa=row['epa']
)
return G
def _calculate_network_metrics(self, G: nx.DiGraph, week: int) -> Dict:
"""Calculate network metrics for a week."""
# Get QB (node with only outgoing edges)
qb = [n for n in G.nodes() if G.in_degree(n) == 0][0]
# Target distribution
targets = [d['targets'] for _, _, d in G.out_edges(qb, data=True)]
total_targets = sum(targets)
shares = [t / total_targets for t in targets]
return {
'week': week,
'total_targets': total_targets,
'unique_receivers': len(targets),
'hhi': sum(s ** 2 for s in shares),
'max_share': max(shares),
'entropy': -sum(s * np.log2(s) for s in shares if s > 0),
'density': nx.density(G)
}
def get_metrics_dataframe(self) -> pd.DataFrame:
"""Return metrics as DataFrame."""
return pd.DataFrame(self.weekly_metrics)
Step 2: Track Target Share Evolution
class TargetShareTracker:
"""Track how target shares change over time."""
def __init__(self, weekly_networks: Dict[int, nx.DiGraph]):
self.networks = weekly_networks
def track_player_share(self, player: str) -> pd.DataFrame:
"""Track a player's target share over the season."""
results = []
for week, G in self.networks.items():
# Find total targets from all QBs
total_targets = sum(
d['targets'] for _, _, d in G.edges(data=True)
)
# Find player's targets
player_targets = sum(
d['targets'] for _, v, d in G.edges(data=True)
if v == player
)
results.append({
'week': week,
'player': player,
'targets': player_targets,
'share': player_targets / total_targets if total_targets > 0 else 0
})
return pd.DataFrame(results)
def identify_share_shifts(self, threshold: float = 0.10) -> List[Dict]:
"""Identify weeks with significant share changes."""
shifts = []
weeks = sorted(self.networks.keys())
for i in range(1, len(weeks)):
prev_week = weeks[i-1]
curr_week = weeks[i]
prev_shares = self._get_shares(prev_week)
curr_shares = self._get_shares(curr_week)
# Find players with big changes
all_players = set(prev_shares.keys()) | set(curr_shares.keys())
for player in all_players:
prev = prev_shares.get(player, 0)
curr = curr_shares.get(player, 0)
change = curr - prev
if abs(change) >= threshold:
shifts.append({
'week': curr_week,
'player': player,
'prev_share': prev,
'curr_share': curr,
'change': change,
'direction': 'increase' if change > 0 else 'decrease'
})
return shifts
def _get_shares(self, week: int) -> Dict[str, float]:
"""Get target shares for a week."""
G = self.networks[week]
total = sum(d['targets'] for _, _, d in G.edges(data=True))
shares = {}
for _, v, d in G.edges(data=True):
shares[v] = shares.get(v, 0) + d['targets'] / total
return shares
Step 3: Correlate Structure with Performance
class StructurePerformanceAnalyzer:
"""Analyze relationship between network structure and performance."""
def __init__(self,
network_metrics: pd.DataFrame,
game_results: pd.DataFrame):
self.metrics = network_metrics
self.results = game_results
def correlate_metrics(self) -> pd.DataFrame:
"""Correlate network metrics with offensive performance."""
# Merge data
merged = self.metrics.merge(
self.results[['week', 'points_scored', 'yards', 'epa_per_play']],
on='week'
)
# Calculate correlations
correlations = []
perf_metrics = ['points_scored', 'yards', 'epa_per_play']
network_metrics = ['hhi', 'max_share', 'entropy', 'unique_receivers']
for net_metric in network_metrics:
for perf_metric in perf_metrics:
corr = merged[net_metric].corr(merged[perf_metric])
correlations.append({
'network_metric': net_metric,
'performance_metric': perf_metric,
'correlation': corr
})
return pd.DataFrame(correlations)
def identify_optimal_structure(self) -> Dict:
"""Identify network structure associated with best performance."""
merged = self.metrics.merge(
self.results[['week', 'epa_per_play']],
on='week'
)
# Split by performance
median_epa = merged['epa_per_play'].median()
good_games = merged[merged['epa_per_play'] >= median_epa]
bad_games = merged[merged['epa_per_play'] < median_epa]
return {
'good_games': {
'avg_hhi': good_games['hhi'].mean(),
'avg_receivers': good_games['unique_receivers'].mean(),
'avg_max_share': good_games['max_share'].mean()
},
'bad_games': {
'avg_hhi': bad_games['hhi'].mean(),
'avg_receivers': bad_games['unique_receivers'].mean(),
'avg_max_share': bad_games['max_share'].mean()
}
}
Results
Network Structure Evolution
WEEKLY NETWORK METRICS - OHIO STATE 2023
========================================
Week | Targets | Receivers | HHI | Max Share | Entropy
-----|---------|-----------|-------|-----------|--------
1 | 28 | 6 | 0.24 | 0.36 | 2.31
2 | 31 | 7 | 0.21 | 0.29 | 2.48
3 | 35 | 8 | 0.19 | 0.26 | 2.65
4 | 29 | 6 | 0.28 | 0.38 | 2.21
5 | 33 | 7 | 0.22 | 0.30 | 2.42
6 | 38 | 8 | 0.18 | 0.25 | 2.71
7 | 30 | 6 | 0.26 | 0.35 | 2.28
8 | 36 | 9 | 0.16 | 0.23 | 2.85
9 | 32 | 7 | 0.23 | 0.31 | 2.39
10 | 34 | 7 | 0.21 | 0.28 | 2.51
11 | 40 | 9 | 0.15 | 0.22 | 2.91
12 | 35 | 8 | 0.17 | 0.24 | 2.78
13 | 31 | 7 | 0.20 | 0.27 | 2.55
Season Trends:
- Target distribution became more spread (HHI decreased)
- More receivers involved as season progressed
- Entropy (distribution evenness) increased
Target Share Evolution
TOP RECEIVER TARGET SHARE EVOLUTION
===================================
Marvin Harrison Jr:
Week 1-4 Avg: 34.2% → Week 9-13 Avg: 23.8%
Trend: Decreasing (defenses adjusting)
Emeka Egbuka:
Week 1-4 Avg: 22.1% → Week 9-13 Avg: 26.4%
Trend: Increasing (becoming more involved)
Julian Fleming:
Week 1-4 Avg: 12.5% → Week 9-13 Avg: 18.2%
Trend: Increasing (emerging target)
Significant Shifts Detected:
- Week 6: Harrison share dropped 8% (double coverage increase)
- Week 8: Egbuka share jumped 7% (scheme adjustment)
- Week 11: Fleming share increased 6% (injury replacement)
Structure-Performance Correlation
CORRELATION ANALYSIS
====================
Network Metric | Points | Yards | EPA/Play
------------------|--------|-------|----------
HHI | -0.42 | -0.38 | -0.51
Max Share | -0.35 | -0.31 | -0.44
Entropy | +0.48 | +0.45 | +0.58
Unique Receivers | +0.52 | +0.49 | +0.61
Key Finding: More distributed passing correlates
with better offensive performance (r=0.61 with EPA)
OPTIMAL STRUCTURE COMPARISON
============================
| Good Games | Bad Games
------------------|------------|----------
Avg HHI | 0.18 | 0.25
Avg Receivers | 7.8 | 6.2
Avg Max Share | 24% | 33%
Insight: Spread distribution (lower HHI, more
receivers) associated with better performance
Centrality Analysis
RECEIVER CENTRALITY RANKINGS
============================
Player | PageRank | In-Degree | Betweenness
----------------|----------|-----------|------------
M. Harrison Jr | 0.312 | 0.285 | 0.156
E. Egbuka | 0.248 | 0.231 | 0.189
J. Fleming | 0.156 | 0.145 | 0.142
TreVeyon Hend. | 0.118 | 0.128 | 0.098
C. Scott | 0.095 | 0.098 | 0.085
Role Classification:
- Harrison: Primary target (high PageRank)
- Egbuka: Secondary option becoming primary
- Henderson: Versatile outlet (RB with high involvement)
- Fleming/Scott: Complementary receivers
Lessons Learned
-
Distribution Matters: More spread target distribution correlated with +0.15 EPA/play improvement
-
Adaptation is Key: Teams that successfully spread targets after opponents adjusted performed better late season
-
Centrality Predicts Usage: High betweenness receivers often see increased targets when primaries are covered
-
Weekly Tracking Essential: Significant structural changes occur that traditional stats miss
-
Visualization Aids Communication: Network diagrams effectively communicated changes to coaching staff
Recommendations
Based on this analysis:
- Target Distribution: Aim for HHI below 0.20 in game planning
- Receiver Development: Develop 7+ viable targets for flexibility
- Weekly Monitoring: Track network metrics weekly to identify trends
- Opponent Analysis: Analyze opponent defensive adjustments through network changes
- In-Game Adjustments: Use real-time network analysis to identify underutilized options