Case Study 1: Analyzing a Complete Team Season
Overview
In this case study, you'll analyze a complete season of traditional statistics for a college football team. You'll calculate team and individual statistics, identify strengths and weaknesses, and generate comprehensive reports.
The Scenario
You're a student analyst for the athletics department. The coaching staff wants a complete statistical breakdown of last season's performance to help with offseason planning. Your task is to:
- Calculate all major offensive and defensive statistics
- Identify the team's statistical strengths and weaknesses
- Compare individual player performances
- Generate a season summary report
Part 1: Loading and Preparing the Data
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
# Simulated season data
np.random.seed(42)
def generate_season_data(team: str, games: int = 13) -> Dict:
"""Generate a full season of game-by-game statistics."""
game_data = []
for week in range(1, games + 1):
# Generate realistic game stats with some variance
is_win = np.random.random() > 0.35 # ~65% win rate
game = {
'week': week,
'opponent': f'Opponent {week}',
'home': week % 2 == 1,
'result': 'W' if is_win else 'L',
# Scoring
'points_for': np.random.randint(21, 48) if is_win else np.random.randint(14, 31),
'points_against': np.random.randint(14, 28) if is_win else np.random.randint(24, 42),
# Passing
'pass_attempts': np.random.randint(28, 45),
'pass_completions': 0, # Will calculate
'pass_yards': np.random.randint(180, 350),
'pass_tds': np.random.randint(1, 4),
'interceptions': np.random.randint(0, 3),
'sacks_allowed': np.random.randint(0, 4),
# Rushing
'rush_attempts': np.random.randint(28, 42),
'rush_yards': np.random.randint(100, 220),
'rush_tds': np.random.randint(0, 3),
# Defense
'opp_pass_yards': np.random.randint(150, 320),
'opp_rush_yards': np.random.randint(80, 180),
'turnovers_forced': np.random.randint(0, 3),
'sacks': np.random.randint(1, 5),
# Situational
'third_down_att': np.random.randint(12, 18),
'third_down_conv': 0, # Will calculate
'red_zone_att': np.random.randint(2, 6),
'red_zone_td': 0, # Will calculate
'penalties': np.random.randint(4, 10),
'penalty_yards': 0, # Will calculate
'time_of_possession': f"{np.random.randint(27, 34)}:{np.random.randint(10, 59):02d}"
}
# Calculate derived stats
comp_pct = np.random.uniform(0.58, 0.72)
game['pass_completions'] = int(game['pass_attempts'] * comp_pct)
third_pct = np.random.uniform(0.35, 0.55)
game['third_down_conv'] = int(game['third_down_att'] * third_pct)
rz_pct = np.random.uniform(0.50, 0.85)
game['red_zone_td'] = int(game['red_zone_att'] * rz_pct)
game['penalty_yards'] = game['penalties'] * np.random.randint(5, 12)
game_data.append(game)
return {'team': team, 'games': game_data}
# Generate season data
season = generate_season_data('State University', games=13)
print(f"Season data generated for {season['team']}")
print(f"Games: {len(season['games'])}")
Part 2: Calculating Season Statistics
class SeasonStatistics:
"""Calculate comprehensive season statistics."""
def __init__(self, season_data: Dict):
self.team = season_data['team']
self.games = pd.DataFrame(season_data['games'])
def record(self) -> Dict:
"""Calculate win-loss record."""
wins = (self.games['result'] == 'W').sum()
losses = (self.games['result'] == 'L').sum()
return {
'wins': wins,
'losses': losses,
'win_pct': round(wins / len(self.games) * 100, 1)
}
def scoring_summary(self) -> Dict:
"""Calculate scoring statistics."""
return {
'total_points': self.games['points_for'].sum(),
'points_per_game': round(self.games['points_for'].mean(), 1),
'points_allowed': self.games['points_against'].sum(),
'points_allowed_per_game': round(self.games['points_against'].mean(), 1),
'point_differential': self.games['points_for'].sum() - self.games['points_against'].sum(),
'avg_margin': round((self.games['points_for'] - self.games['points_against']).mean(), 1)
}
def passing_summary(self) -> Dict:
"""Calculate passing statistics."""
totals = {
'attempts': self.games['pass_attempts'].sum(),
'completions': self.games['pass_completions'].sum(),
'yards': self.games['pass_yards'].sum(),
'touchdowns': self.games['pass_tds'].sum(),
'interceptions': self.games['interceptions'].sum(),
'sacks': self.games['sacks_allowed'].sum()
}
rates = {
'completion_pct': round(totals['completions'] / totals['attempts'] * 100, 1),
'yards_per_attempt': round(totals['yards'] / totals['attempts'], 2),
'yards_per_game': round(totals['yards'] / len(self.games), 1),
'td_pct': round(totals['touchdowns'] / totals['attempts'] * 100, 1),
'int_pct': round(totals['interceptions'] / totals['attempts'] * 100, 1)
}
return {**totals, **rates}
def rushing_summary(self) -> Dict:
"""Calculate rushing statistics."""
totals = {
'attempts': self.games['rush_attempts'].sum(),
'yards': self.games['rush_yards'].sum(),
'touchdowns': self.games['rush_tds'].sum()
}
rates = {
'yards_per_carry': round(totals['yards'] / totals['attempts'], 2),
'yards_per_game': round(totals['yards'] / len(self.games), 1),
'attempts_per_game': round(totals['attempts'] / len(self.games), 1)
}
return {**totals, **rates}
def defensive_summary(self) -> Dict:
"""Calculate defensive statistics."""
return {
'total_yards_allowed': self.games['opp_pass_yards'].sum() + self.games['opp_rush_yards'].sum(),
'yards_per_game_allowed': round(
(self.games['opp_pass_yards'].sum() + self.games['opp_rush_yards'].sum()) / len(self.games), 1
),
'pass_yards_allowed': self.games['opp_pass_yards'].sum(),
'rush_yards_allowed': self.games['opp_rush_yards'].sum(),
'turnovers_forced': self.games['turnovers_forced'].sum(),
'sacks': self.games['sacks'].sum(),
'turnovers_per_game': round(self.games['turnovers_forced'].mean(), 2)
}
def situational_summary(self) -> Dict:
"""Calculate situational statistics."""
third_att = self.games['third_down_att'].sum()
third_conv = self.games['third_down_conv'].sum()
rz_att = self.games['red_zone_att'].sum()
rz_td = self.games['red_zone_td'].sum()
return {
'third_down_att': third_att,
'third_down_conv': third_conv,
'third_down_pct': round(third_conv / third_att * 100, 1),
'red_zone_att': rz_att,
'red_zone_td': rz_td,
'red_zone_td_pct': round(rz_td / rz_att * 100, 1),
'penalties': self.games['penalties'].sum(),
'penalty_yards': self.games['penalty_yards'].sum(),
'penalties_per_game': round(self.games['penalties'].mean(), 1)
}
def turnover_summary(self) -> Dict:
"""Calculate turnover statistics."""
turnovers_committed = self.games['interceptions'].sum() # Simplified
turnovers_forced = self.games['turnovers_forced'].sum()
return {
'turnovers_committed': turnovers_committed,
'turnovers_forced': turnovers_forced,
'turnover_margin': turnovers_forced - turnovers_committed,
'margin_per_game': round((turnovers_forced - turnovers_committed) / len(self.games), 2)
}
# Calculate statistics
stats = SeasonStatistics(season)
print("\n" + "=" * 60)
print(f"SEASON STATISTICS: {stats.team}")
print("=" * 60)
record = stats.record()
print(f"\nRecord: {record['wins']}-{record['losses']} ({record['win_pct']}%)")
scoring = stats.scoring_summary()
print(f"\nScoring:")
print(f" Points per game: {scoring['points_per_game']}")
print(f" Points allowed per game: {scoring['points_allowed_per_game']}")
print(f" Point differential: {scoring['point_differential']:+d}")
Part 3: Identifying Strengths and Weaknesses
def analyze_performance(stats: SeasonStatistics,
benchmarks: Dict) -> Dict:
"""
Compare team statistics against benchmarks to identify
strengths and weaknesses.
Parameters
----------
stats : SeasonStatistics
Team's season statistics
benchmarks : dict
FBS average benchmarks for comparison
Returns
-------
dict : Analysis with strengths and weaknesses
"""
passing = stats.passing_summary()
rushing = stats.rushing_summary()
defense = stats.defensive_summary()
situational = stats.situational_summary()
analysis = {
'strengths': [],
'weaknesses': [],
'average': []
}
# Passing analysis
if passing['completion_pct'] > benchmarks['completion_pct'] + 3:
analysis['strengths'].append(
f"Passing accuracy ({passing['completion_pct']}% vs {benchmarks['completion_pct']}% avg)"
)
elif passing['completion_pct'] < benchmarks['completion_pct'] - 3:
analysis['weaknesses'].append(
f"Passing accuracy ({passing['completion_pct']}% vs {benchmarks['completion_pct']}% avg)"
)
if passing['yards_per_attempt'] > benchmarks['yards_per_attempt'] + 0.5:
analysis['strengths'].append(
f"Passing efficiency ({passing['yards_per_attempt']} YPA vs {benchmarks['yards_per_attempt']} avg)"
)
# Rushing analysis
if rushing['yards_per_carry'] > benchmarks['yards_per_carry'] + 0.5:
analysis['strengths'].append(
f"Rushing efficiency ({rushing['yards_per_carry']} YPC vs {benchmarks['yards_per_carry']} avg)"
)
elif rushing['yards_per_carry'] < benchmarks['yards_per_carry'] - 0.5:
analysis['weaknesses'].append(
f"Rushing efficiency ({rushing['yards_per_carry']} YPC vs {benchmarks['yards_per_carry']} avg)"
)
# Defensive analysis
if defense['yards_per_game_allowed'] < benchmarks['yards_allowed_per_game'] - 20:
analysis['strengths'].append(
f"Defensive yards allowed ({defense['yards_per_game_allowed']} vs {benchmarks['yards_allowed_per_game']} avg)"
)
elif defense['yards_per_game_allowed'] > benchmarks['yards_allowed_per_game'] + 20:
analysis['weaknesses'].append(
f"Defensive yards allowed ({defense['yards_per_game_allowed']} vs {benchmarks['yards_allowed_per_game']} avg)"
)
# Situational analysis
if situational['third_down_pct'] > benchmarks['third_down_pct'] + 5:
analysis['strengths'].append(
f"Third down efficiency ({situational['third_down_pct']}% vs {benchmarks['third_down_pct']}% avg)"
)
elif situational['third_down_pct'] < benchmarks['third_down_pct'] - 5:
analysis['weaknesses'].append(
f"Third down efficiency ({situational['third_down_pct']}% vs {benchmarks['third_down_pct']}% avg)"
)
if situational['red_zone_td_pct'] > benchmarks['red_zone_td_pct'] + 5:
analysis['strengths'].append(
f"Red zone scoring ({situational['red_zone_td_pct']}% TD rate)"
)
return analysis
# FBS benchmarks (approximate averages)
fbs_benchmarks = {
'completion_pct': 62.0,
'yards_per_attempt': 7.5,
'yards_per_carry': 4.2,
'yards_allowed_per_game': 380,
'third_down_pct': 40.0,
'red_zone_td_pct': 60.0
}
analysis = analyze_performance(stats, fbs_benchmarks)
print("\n" + "=" * 60)
print("PERFORMANCE ANALYSIS")
print("=" * 60)
print("\nStrengths:")
for strength in analysis['strengths']:
print(f" ✓ {strength}")
print("\nWeaknesses:")
for weakness in analysis['weaknesses']:
print(f" ✗ {weakness}")
Part 4: Game-by-Game Trends
def analyze_trends(stats: SeasonStatistics) -> Dict:
"""Analyze trends throughout the season."""
games = stats.games.copy()
# Add game number for trend analysis
games['game_num'] = range(1, len(games) + 1)
# Calculate rolling averages (3-game)
games['points_rolling'] = games['points_for'].rolling(3, min_periods=1).mean()
games['yards_rolling'] = (games['pass_yards'] + games['rush_yards']).rolling(3, min_periods=1).mean()
# First half vs second half
mid = len(games) // 2
first_half = games.iloc[:mid]
second_half = games.iloc[mid:]
trends = {
'first_half': {
'record': f"{(first_half['result'] == 'W').sum()}-{(first_half['result'] == 'L').sum()}",
'ppg': round(first_half['points_for'].mean(), 1),
'ypg': round((first_half['pass_yards'] + first_half['rush_yards']).mean(), 1)
},
'second_half': {
'record': f"{(second_half['result'] == 'W').sum()}-{(second_half['result'] == 'L').sum()}",
'ppg': round(second_half['points_for'].mean(), 1),
'ypg': round((second_half['pass_yards'] + second_half['rush_yards']).mean(), 1)
},
'improving': second_half['points_for'].mean() > first_half['points_for'].mean()
}
return trends
trends = analyze_trends(stats)
print("\n" + "=" * 60)
print("SEASON TRENDS")
print("=" * 60)
print(f"\nFirst Half: {trends['first_half']['record']}")
print(f" PPG: {trends['first_half']['ppg']}")
print(f" YPG: {trends['first_half']['ypg']}")
print(f"\nSecond Half: {trends['second_half']['record']}")
print(f" PPG: {trends['second_half']['ppg']}")
print(f" YPG: {trends['second_half']['ypg']}")
print(f"\nTrend: {'Improving' if trends['improving'] else 'Declining'}")
Part 5: Generating the Season Report
def generate_season_report(stats: SeasonStatistics,
analysis: Dict,
trends: Dict) -> str:
"""Generate a comprehensive season report."""
record = stats.record()
scoring = stats.scoring_summary()
passing = stats.passing_summary()
rushing = stats.rushing_summary()
defense = stats.defensive_summary()
situational = stats.situational_summary()
turnovers = stats.turnover_summary()
report = f"""
{'='*70}
SEASON STATISTICAL REPORT: {stats.team}
{'='*70}
OVERVIEW
--------
Record: {record['wins']}-{record['losses']} ({record['win_pct']}%)
Points Scored: {scoring['total_points']} ({scoring['points_per_game']} PPG)
Points Allowed: {scoring['points_allowed']} ({scoring['points_allowed_per_game']} PPG)
Point Differential: {scoring['point_differential']:+d} ({scoring['avg_margin']:+.1f} per game)
OFFENSIVE STATISTICS
--------------------
Total Offense: {passing['yards'] + rushing['yards']} yards
Passing: {passing['yards']} yards ({passing['yards_per_game']} YPG)
Rushing: {rushing['yards']} yards ({rushing['yards_per_game']} YPG)
Passing:
Completions/Attempts: {passing['completions']}/{passing['attempts']}
Completion %: {passing['completion_pct']}%
Yards/Attempt: {passing['yards_per_attempt']}
Touchdowns: {passing['touchdowns']}
Interceptions: {passing['interceptions']}
TD%: {passing['td_pct']}% | INT%: {passing['int_pct']}%
Rushing:
Carries: {rushing['attempts']} ({rushing['attempts_per_game']} per game)
Yards/Carry: {rushing['yards_per_carry']}
Touchdowns: {rushing['touchdowns']}
DEFENSIVE STATISTICS
--------------------
Total Defense: {defense['total_yards_allowed']} yards allowed
Passing: {defense['pass_yards_allowed']} yards allowed
Rushing: {defense['rush_yards_allowed']} yards allowed
Yards/Game Allowed: {defense['yards_per_game_allowed']}
Turnovers Forced: {defense['turnovers_forced']}
Sacks: {defense['sacks']}
SITUATIONAL STATISTICS
----------------------
Third Down: {situational['third_down_conv']}/{situational['third_down_att']} ({situational['third_down_pct']}%)
Red Zone TD: {situational['red_zone_td']}/{situational['red_zone_att']} ({situational['red_zone_td_pct']}%)
Penalties: {situational['penalties']} for {situational['penalty_yards']} yards ({situational['penalties_per_game']} per game)
TURNOVER MARGIN
---------------
Turnovers Committed: {turnovers['turnovers_committed']}
Turnovers Forced: {turnovers['turnovers_forced']}
Margin: {turnovers['turnover_margin']:+d} ({turnovers['margin_per_game']:+.2f} per game)
STRENGTHS & WEAKNESSES
----------------------
Strengths:
"""
for strength in analysis['strengths']:
report += f" • {strength}\n"
report += "\nWeaknesses:\n"
for weakness in analysis['weaknesses']:
report += f" • {weakness}\n"
report += f"""
SEASON TRENDS
-------------
First Half: {trends['first_half']['record']} | {trends['first_half']['ppg']} PPG
Second Half: {trends['second_half']['record']} | {trends['second_half']['ppg']} PPG
Overall Trend: {'Improving' if trends['improving'] else 'Declining'} as season progressed
{'='*70}
"""
return report
report = generate_season_report(stats, analysis, trends)
print(report)
Summary
This case study demonstrated how to:
- Aggregate game-by-game data into season totals and rates
- Calculate comprehensive statistics for all phases of the game
- Compare against benchmarks to identify strengths and weaknesses
- Analyze trends throughout the season
- Generate professional reports for coaching staff
The techniques used here form the foundation for more advanced analysis covered in subsequent chapters.
Exercises
- Add individual player statistics to the report
- Create visualizations of game-by-game trends
- Compare this team to conference opponents
- Identify the team's best and worst statistical games