Case Study 1: Analyzing a Complete Team Season

Overview

In this case study, you'll analyze a complete season of traditional statistics for a college football team. You'll calculate team and individual statistics, identify strengths and weaknesses, and generate comprehensive reports.


The Scenario

You're a student analyst for the athletics department. The coaching staff wants a complete statistical breakdown of last season's performance to help with offseason planning. Your task is to:

  1. Calculate all major offensive and defensive statistics
  2. Identify the team's statistical strengths and weaknesses
  3. Compare individual player performances
  4. Generate a season summary report

Part 1: Loading and Preparing the Data

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple

# Simulated season data
np.random.seed(42)

def generate_season_data(team: str, games: int = 13) -> Dict:
    """Generate a full season of game-by-game statistics."""

    game_data = []
    for week in range(1, games + 1):
        # Generate realistic game stats with some variance
        is_win = np.random.random() > 0.35  # ~65% win rate

        game = {
            'week': week,
            'opponent': f'Opponent {week}',
            'home': week % 2 == 1,
            'result': 'W' if is_win else 'L',

            # Scoring
            'points_for': np.random.randint(21, 48) if is_win else np.random.randint(14, 31),
            'points_against': np.random.randint(14, 28) if is_win else np.random.randint(24, 42),

            # Passing
            'pass_attempts': np.random.randint(28, 45),
            'pass_completions': 0,  # Will calculate
            'pass_yards': np.random.randint(180, 350),
            'pass_tds': np.random.randint(1, 4),
            'interceptions': np.random.randint(0, 3),
            'sacks_allowed': np.random.randint(0, 4),

            # Rushing
            'rush_attempts': np.random.randint(28, 42),
            'rush_yards': np.random.randint(100, 220),
            'rush_tds': np.random.randint(0, 3),

            # Defense
            'opp_pass_yards': np.random.randint(150, 320),
            'opp_rush_yards': np.random.randint(80, 180),
            'turnovers_forced': np.random.randint(0, 3),
            'sacks': np.random.randint(1, 5),

            # Situational
            'third_down_att': np.random.randint(12, 18),
            'third_down_conv': 0,  # Will calculate
            'red_zone_att': np.random.randint(2, 6),
            'red_zone_td': 0,  # Will calculate
            'penalties': np.random.randint(4, 10),
            'penalty_yards': 0,  # Will calculate
            'time_of_possession': f"{np.random.randint(27, 34)}:{np.random.randint(10, 59):02d}"
        }

        # Calculate derived stats
        comp_pct = np.random.uniform(0.58, 0.72)
        game['pass_completions'] = int(game['pass_attempts'] * comp_pct)

        third_pct = np.random.uniform(0.35, 0.55)
        game['third_down_conv'] = int(game['third_down_att'] * third_pct)

        rz_pct = np.random.uniform(0.50, 0.85)
        game['red_zone_td'] = int(game['red_zone_att'] * rz_pct)

        game['penalty_yards'] = game['penalties'] * np.random.randint(5, 12)

        game_data.append(game)

    return {'team': team, 'games': game_data}


# Generate season data
season = generate_season_data('State University', games=13)

print(f"Season data generated for {season['team']}")
print(f"Games: {len(season['games'])}")

Part 2: Calculating Season Statistics

class SeasonStatistics:
    """Calculate comprehensive season statistics."""

    def __init__(self, season_data: Dict):
        self.team = season_data['team']
        self.games = pd.DataFrame(season_data['games'])

    def record(self) -> Dict:
        """Calculate win-loss record."""
        wins = (self.games['result'] == 'W').sum()
        losses = (self.games['result'] == 'L').sum()
        return {
            'wins': wins,
            'losses': losses,
            'win_pct': round(wins / len(self.games) * 100, 1)
        }

    def scoring_summary(self) -> Dict:
        """Calculate scoring statistics."""
        return {
            'total_points': self.games['points_for'].sum(),
            'points_per_game': round(self.games['points_for'].mean(), 1),
            'points_allowed': self.games['points_against'].sum(),
            'points_allowed_per_game': round(self.games['points_against'].mean(), 1),
            'point_differential': self.games['points_for'].sum() - self.games['points_against'].sum(),
            'avg_margin': round((self.games['points_for'] - self.games['points_against']).mean(), 1)
        }

    def passing_summary(self) -> Dict:
        """Calculate passing statistics."""
        totals = {
            'attempts': self.games['pass_attempts'].sum(),
            'completions': self.games['pass_completions'].sum(),
            'yards': self.games['pass_yards'].sum(),
            'touchdowns': self.games['pass_tds'].sum(),
            'interceptions': self.games['interceptions'].sum(),
            'sacks': self.games['sacks_allowed'].sum()
        }

        rates = {
            'completion_pct': round(totals['completions'] / totals['attempts'] * 100, 1),
            'yards_per_attempt': round(totals['yards'] / totals['attempts'], 2),
            'yards_per_game': round(totals['yards'] / len(self.games), 1),
            'td_pct': round(totals['touchdowns'] / totals['attempts'] * 100, 1),
            'int_pct': round(totals['interceptions'] / totals['attempts'] * 100, 1)
        }

        return {**totals, **rates}

    def rushing_summary(self) -> Dict:
        """Calculate rushing statistics."""
        totals = {
            'attempts': self.games['rush_attempts'].sum(),
            'yards': self.games['rush_yards'].sum(),
            'touchdowns': self.games['rush_tds'].sum()
        }

        rates = {
            'yards_per_carry': round(totals['yards'] / totals['attempts'], 2),
            'yards_per_game': round(totals['yards'] / len(self.games), 1),
            'attempts_per_game': round(totals['attempts'] / len(self.games), 1)
        }

        return {**totals, **rates}

    def defensive_summary(self) -> Dict:
        """Calculate defensive statistics."""
        return {
            'total_yards_allowed': self.games['opp_pass_yards'].sum() + self.games['opp_rush_yards'].sum(),
            'yards_per_game_allowed': round(
                (self.games['opp_pass_yards'].sum() + self.games['opp_rush_yards'].sum()) / len(self.games), 1
            ),
            'pass_yards_allowed': self.games['opp_pass_yards'].sum(),
            'rush_yards_allowed': self.games['opp_rush_yards'].sum(),
            'turnovers_forced': self.games['turnovers_forced'].sum(),
            'sacks': self.games['sacks'].sum(),
            'turnovers_per_game': round(self.games['turnovers_forced'].mean(), 2)
        }

    def situational_summary(self) -> Dict:
        """Calculate situational statistics."""
        third_att = self.games['third_down_att'].sum()
        third_conv = self.games['third_down_conv'].sum()

        rz_att = self.games['red_zone_att'].sum()
        rz_td = self.games['red_zone_td'].sum()

        return {
            'third_down_att': third_att,
            'third_down_conv': third_conv,
            'third_down_pct': round(third_conv / third_att * 100, 1),
            'red_zone_att': rz_att,
            'red_zone_td': rz_td,
            'red_zone_td_pct': round(rz_td / rz_att * 100, 1),
            'penalties': self.games['penalties'].sum(),
            'penalty_yards': self.games['penalty_yards'].sum(),
            'penalties_per_game': round(self.games['penalties'].mean(), 1)
        }

    def turnover_summary(self) -> Dict:
        """Calculate turnover statistics."""
        turnovers_committed = self.games['interceptions'].sum()  # Simplified
        turnovers_forced = self.games['turnovers_forced'].sum()

        return {
            'turnovers_committed': turnovers_committed,
            'turnovers_forced': turnovers_forced,
            'turnover_margin': turnovers_forced - turnovers_committed,
            'margin_per_game': round((turnovers_forced - turnovers_committed) / len(self.games), 2)
        }


# Calculate statistics
stats = SeasonStatistics(season)

print("\n" + "=" * 60)
print(f"SEASON STATISTICS: {stats.team}")
print("=" * 60)

record = stats.record()
print(f"\nRecord: {record['wins']}-{record['losses']} ({record['win_pct']}%)")

scoring = stats.scoring_summary()
print(f"\nScoring:")
print(f"  Points per game: {scoring['points_per_game']}")
print(f"  Points allowed per game: {scoring['points_allowed_per_game']}")
print(f"  Point differential: {scoring['point_differential']:+d}")

Part 3: Identifying Strengths and Weaknesses

def analyze_performance(stats: SeasonStatistics,
                         benchmarks: Dict) -> Dict:
    """
    Compare team statistics against benchmarks to identify
    strengths and weaknesses.

    Parameters
    ----------
    stats : SeasonStatistics
        Team's season statistics
    benchmarks : dict
        FBS average benchmarks for comparison

    Returns
    -------
    dict : Analysis with strengths and weaknesses
    """
    passing = stats.passing_summary()
    rushing = stats.rushing_summary()
    defense = stats.defensive_summary()
    situational = stats.situational_summary()

    analysis = {
        'strengths': [],
        'weaknesses': [],
        'average': []
    }

    # Passing analysis
    if passing['completion_pct'] > benchmarks['completion_pct'] + 3:
        analysis['strengths'].append(
            f"Passing accuracy ({passing['completion_pct']}% vs {benchmarks['completion_pct']}% avg)"
        )
    elif passing['completion_pct'] < benchmarks['completion_pct'] - 3:
        analysis['weaknesses'].append(
            f"Passing accuracy ({passing['completion_pct']}% vs {benchmarks['completion_pct']}% avg)"
        )

    if passing['yards_per_attempt'] > benchmarks['yards_per_attempt'] + 0.5:
        analysis['strengths'].append(
            f"Passing efficiency ({passing['yards_per_attempt']} YPA vs {benchmarks['yards_per_attempt']} avg)"
        )

    # Rushing analysis
    if rushing['yards_per_carry'] > benchmarks['yards_per_carry'] + 0.5:
        analysis['strengths'].append(
            f"Rushing efficiency ({rushing['yards_per_carry']} YPC vs {benchmarks['yards_per_carry']} avg)"
        )
    elif rushing['yards_per_carry'] < benchmarks['yards_per_carry'] - 0.5:
        analysis['weaknesses'].append(
            f"Rushing efficiency ({rushing['yards_per_carry']} YPC vs {benchmarks['yards_per_carry']} avg)"
        )

    # Defensive analysis
    if defense['yards_per_game_allowed'] < benchmarks['yards_allowed_per_game'] - 20:
        analysis['strengths'].append(
            f"Defensive yards allowed ({defense['yards_per_game_allowed']} vs {benchmarks['yards_allowed_per_game']} avg)"
        )
    elif defense['yards_per_game_allowed'] > benchmarks['yards_allowed_per_game'] + 20:
        analysis['weaknesses'].append(
            f"Defensive yards allowed ({defense['yards_per_game_allowed']} vs {benchmarks['yards_allowed_per_game']} avg)"
        )

    # Situational analysis
    if situational['third_down_pct'] > benchmarks['third_down_pct'] + 5:
        analysis['strengths'].append(
            f"Third down efficiency ({situational['third_down_pct']}% vs {benchmarks['third_down_pct']}% avg)"
        )
    elif situational['third_down_pct'] < benchmarks['third_down_pct'] - 5:
        analysis['weaknesses'].append(
            f"Third down efficiency ({situational['third_down_pct']}% vs {benchmarks['third_down_pct']}% avg)"
        )

    if situational['red_zone_td_pct'] > benchmarks['red_zone_td_pct'] + 5:
        analysis['strengths'].append(
            f"Red zone scoring ({situational['red_zone_td_pct']}% TD rate)"
        )

    return analysis


# FBS benchmarks (approximate averages)
fbs_benchmarks = {
    'completion_pct': 62.0,
    'yards_per_attempt': 7.5,
    'yards_per_carry': 4.2,
    'yards_allowed_per_game': 380,
    'third_down_pct': 40.0,
    'red_zone_td_pct': 60.0
}

analysis = analyze_performance(stats, fbs_benchmarks)

print("\n" + "=" * 60)
print("PERFORMANCE ANALYSIS")
print("=" * 60)

print("\nStrengths:")
for strength in analysis['strengths']:
    print(f"  ✓ {strength}")

print("\nWeaknesses:")
for weakness in analysis['weaknesses']:
    print(f"  ✗ {weakness}")

def analyze_trends(stats: SeasonStatistics) -> Dict:
    """Analyze trends throughout the season."""

    games = stats.games.copy()

    # Add game number for trend analysis
    games['game_num'] = range(1, len(games) + 1)

    # Calculate rolling averages (3-game)
    games['points_rolling'] = games['points_for'].rolling(3, min_periods=1).mean()
    games['yards_rolling'] = (games['pass_yards'] + games['rush_yards']).rolling(3, min_periods=1).mean()

    # First half vs second half
    mid = len(games) // 2
    first_half = games.iloc[:mid]
    second_half = games.iloc[mid:]

    trends = {
        'first_half': {
            'record': f"{(first_half['result'] == 'W').sum()}-{(first_half['result'] == 'L').sum()}",
            'ppg': round(first_half['points_for'].mean(), 1),
            'ypg': round((first_half['pass_yards'] + first_half['rush_yards']).mean(), 1)
        },
        'second_half': {
            'record': f"{(second_half['result'] == 'W').sum()}-{(second_half['result'] == 'L').sum()}",
            'ppg': round(second_half['points_for'].mean(), 1),
            'ypg': round((second_half['pass_yards'] + second_half['rush_yards']).mean(), 1)
        },
        'improving': second_half['points_for'].mean() > first_half['points_for'].mean()
    }

    return trends


trends = analyze_trends(stats)

print("\n" + "=" * 60)
print("SEASON TRENDS")
print("=" * 60)

print(f"\nFirst Half: {trends['first_half']['record']}")
print(f"  PPG: {trends['first_half']['ppg']}")
print(f"  YPG: {trends['first_half']['ypg']}")

print(f"\nSecond Half: {trends['second_half']['record']}")
print(f"  PPG: {trends['second_half']['ppg']}")
print(f"  YPG: {trends['second_half']['ypg']}")

print(f"\nTrend: {'Improving' if trends['improving'] else 'Declining'}")

Part 5: Generating the Season Report

def generate_season_report(stats: SeasonStatistics,
                            analysis: Dict,
                            trends: Dict) -> str:
    """Generate a comprehensive season report."""

    record = stats.record()
    scoring = stats.scoring_summary()
    passing = stats.passing_summary()
    rushing = stats.rushing_summary()
    defense = stats.defensive_summary()
    situational = stats.situational_summary()
    turnovers = stats.turnover_summary()

    report = f"""
{'='*70}
SEASON STATISTICAL REPORT: {stats.team}
{'='*70}

OVERVIEW
--------
Record: {record['wins']}-{record['losses']} ({record['win_pct']}%)
Points Scored: {scoring['total_points']} ({scoring['points_per_game']} PPG)
Points Allowed: {scoring['points_allowed']} ({scoring['points_allowed_per_game']} PPG)
Point Differential: {scoring['point_differential']:+d} ({scoring['avg_margin']:+.1f} per game)

OFFENSIVE STATISTICS
--------------------
Total Offense: {passing['yards'] + rushing['yards']} yards
  Passing: {passing['yards']} yards ({passing['yards_per_game']} YPG)
  Rushing: {rushing['yards']} yards ({rushing['yards_per_game']} YPG)

Passing:
  Completions/Attempts: {passing['completions']}/{passing['attempts']}
  Completion %: {passing['completion_pct']}%
  Yards/Attempt: {passing['yards_per_attempt']}
  Touchdowns: {passing['touchdowns']}
  Interceptions: {passing['interceptions']}
  TD%: {passing['td_pct']}% | INT%: {passing['int_pct']}%

Rushing:
  Carries: {rushing['attempts']} ({rushing['attempts_per_game']} per game)
  Yards/Carry: {rushing['yards_per_carry']}
  Touchdowns: {rushing['touchdowns']}

DEFENSIVE STATISTICS
--------------------
Total Defense: {defense['total_yards_allowed']} yards allowed
  Passing: {defense['pass_yards_allowed']} yards allowed
  Rushing: {defense['rush_yards_allowed']} yards allowed
  Yards/Game Allowed: {defense['yards_per_game_allowed']}

Turnovers Forced: {defense['turnovers_forced']}
Sacks: {defense['sacks']}

SITUATIONAL STATISTICS
----------------------
Third Down: {situational['third_down_conv']}/{situational['third_down_att']} ({situational['third_down_pct']}%)
Red Zone TD: {situational['red_zone_td']}/{situational['red_zone_att']} ({situational['red_zone_td_pct']}%)
Penalties: {situational['penalties']} for {situational['penalty_yards']} yards ({situational['penalties_per_game']} per game)

TURNOVER MARGIN
---------------
Turnovers Committed: {turnovers['turnovers_committed']}
Turnovers Forced: {turnovers['turnovers_forced']}
Margin: {turnovers['turnover_margin']:+d} ({turnovers['margin_per_game']:+.2f} per game)

STRENGTHS & WEAKNESSES
----------------------
Strengths:
"""
    for strength in analysis['strengths']:
        report += f"  • {strength}\n"

    report += "\nWeaknesses:\n"
    for weakness in analysis['weaknesses']:
        report += f"  • {weakness}\n"

    report += f"""
SEASON TRENDS
-------------
First Half: {trends['first_half']['record']} | {trends['first_half']['ppg']} PPG
Second Half: {trends['second_half']['record']} | {trends['second_half']['ppg']} PPG
Overall Trend: {'Improving' if trends['improving'] else 'Declining'} as season progressed

{'='*70}
"""
    return report


report = generate_season_report(stats, analysis, trends)
print(report)

Summary

This case study demonstrated how to:

  1. Aggregate game-by-game data into season totals and rates
  2. Calculate comprehensive statistics for all phases of the game
  3. Compare against benchmarks to identify strengths and weaknesses
  4. Analyze trends throughout the season
  5. Generate professional reports for coaching staff

The techniques used here form the foundation for more advanced analysis covered in subsequent chapters.


Exercises

  1. Add individual player statistics to the report
  2. Create visualizations of game-by-game trends
  3. Compare this team to conference opponents
  4. Identify the team's best and worst statistical games