Case Study 2: Quarterback Comparison Analysis

Overview

In this case study, you'll use traditional statistics to compare multiple quarterbacks and determine which one had the best season. You'll calculate efficiency metrics, apply context adjustments, and present a comprehensive comparison.


The Scenario

A sports media outlet wants to determine the top quarterback in college football for their end-of-season awards. You have statistics for five top quarterbacks and need to:

  1. Calculate all relevant passing metrics for each QB
  2. Rank them by different criteria
  3. Determine the most complete quarterback
  4. Present a data-driven recommendation

Part 1: The Data

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple

# Quarterback season statistics
qb_data = {
    'QB Alpha': {
        'team': 'University A',
        'games': 14,
        'completions': 298,
        'attempts': 445,
        'yards': 3892,
        'touchdowns': 38,
        'interceptions': 6,
        'sacks': 18,
        'rush_attempts': 85,
        'rush_yards': 312,
        'rush_tds': 5,
        'team_wins': 13,
        'conference': 'Power Five'
    },
    'QB Beta': {
        'team': 'University B',
        'games': 13,
        'completions': 276,
        'attempts': 412,
        'yards': 3654,
        'touchdowns': 32,
        'interceptions': 8,
        'sacks': 22,
        'rush_attempts': 45,
        'rush_yards': 156,
        'rush_tds': 2,
        'team_wins': 11,
        'conference': 'Power Five'
    },
    'QB Gamma': {
        'team': 'University C',
        'games': 14,
        'completions': 312,
        'attempts': 478,
        'yards': 4125,
        'touchdowns': 35,
        'interceptions': 11,
        'sacks': 15,
        'rush_attempts': 98,
        'rush_yards': 445,
        'rush_tds': 8,
        'team_wins': 12,
        'conference': 'Power Five'
    },
    'QB Delta': {
        'team': 'University D',
        'games': 12,
        'completions': 245,
        'attempts': 362,
        'yards': 3256,
        'touchdowns': 28,
        'interceptions': 5,
        'sacks': 12,
        'rush_attempts': 32,
        'rush_yards': 78,
        'rush_tds': 1,
        'team_wins': 10,
        'conference': 'Power Five'
    },
    'QB Epsilon': {
        'team': 'University E',
        'games': 13,
        'completions': 285,
        'attempts': 425,
        'yards': 3789,
        'touchdowns': 33,
        'interceptions': 7,
        'sacks': 20,
        'rush_attempts': 72,
        'rush_yards': 285,
        'rush_tds': 4,
        'team_wins': 11,
        'conference': 'Power Five'
    }
}

qb_df = pd.DataFrame(qb_data).T.reset_index()
qb_df.columns = ['name'] + list(qb_df.columns[1:])
print("Quarterback Raw Statistics:")
print(qb_df[['name', 'team', 'games', 'completions', 'attempts', 'yards', 'touchdowns', 'interceptions']])

Part 2: Calculating Efficiency Metrics

class QBAnalyzer:
    """Analyze and compare quarterback statistics."""

    def __init__(self, qb_stats: Dict):
        self.qb_stats = qb_stats

    def calculate_passing_metrics(self, qb_name: str) -> Dict:
        """Calculate all passing efficiency metrics for a QB."""
        stats = self.qb_stats[qb_name]

        comp_pct = stats['completions'] / stats['attempts'] * 100
        ypa = stats['yards'] / stats['attempts']
        td_pct = stats['touchdowns'] / stats['attempts'] * 100
        int_pct = stats['interceptions'] / stats['attempts'] * 100

        # Calculate passer rating
        passer_rating = self._calculate_passer_rating(comp_pct, ypa, td_pct, int_pct)

        # Per game stats
        ypg = stats['yards'] / stats['games']
        tdpg = stats['touchdowns'] / stats['games']

        # TD-to-INT ratio
        td_int_ratio = stats['touchdowns'] / max(stats['interceptions'], 1)

        return {
            'name': qb_name,
            'games': stats['games'],
            'completions': stats['completions'],
            'attempts': stats['attempts'],
            'yards': stats['yards'],
            'touchdowns': stats['touchdowns'],
            'interceptions': stats['interceptions'],
            'completion_pct': round(comp_pct, 1),
            'yards_per_attempt': round(ypa, 2),
            'td_pct': round(td_pct, 1),
            'int_pct': round(int_pct, 1),
            'passer_rating': round(passer_rating, 1),
            'yards_per_game': round(ypg, 1),
            'td_per_game': round(tdpg, 2),
            'td_int_ratio': round(td_int_ratio, 2)
        }

    def _calculate_passer_rating(self, comp_pct: float, ypa: float,
                                   td_pct: float, int_pct: float) -> float:
        """Calculate NFL-style passer rating."""
        a = max(0, min(((comp_pct - 30) / 20), 2.375))
        b = max(0, min(((ypa - 3) / 4), 2.375))
        c = max(0, min((td_pct / 5), 2.375))
        d = max(0, min((2.375 - (int_pct / 4)), 2.375))

        return ((a + b + c + d) / 6) * 100

    def calculate_total_offense(self, qb_name: str) -> Dict:
        """Calculate total offensive contribution (passing + rushing)."""
        stats = self.qb_stats[qb_name]

        total_yards = stats['yards'] + stats['rush_yards']
        total_tds = stats['touchdowns'] + stats['rush_tds']
        total_plays = stats['attempts'] + stats['rush_attempts']

        return {
            'name': qb_name,
            'total_yards': total_yards,
            'total_tds': total_tds,
            'total_plays': total_plays,
            'yards_per_play': round(total_yards / total_plays, 2),
            'total_yards_per_game': round(total_yards / stats['games'], 1),
            'total_tds_per_game': round(total_tds / stats['games'], 2)
        }

    def compare_all(self) -> pd.DataFrame:
        """Compare all quarterbacks."""
        comparisons = []

        for qb_name in self.qb_stats:
            passing = self.calculate_passing_metrics(qb_name)
            total = self.calculate_total_offense(qb_name)

            combined = {**passing, **total}
            combined['team_wins'] = self.qb_stats[qb_name]['team_wins']
            comparisons.append(combined)

        return pd.DataFrame(comparisons)


# Analyze all quarterbacks
analyzer = QBAnalyzer(qb_data)

# Compare all
comparison_df = analyzer.compare_all()

print("\n" + "=" * 70)
print("QUARTERBACK COMPARISON")
print("=" * 70)

# Display key metrics
display_cols = ['name', 'completion_pct', 'yards_per_attempt', 'passer_rating',
                'td_int_ratio', 'total_yards_per_game']
print("\nEfficiency Metrics:")
print(comparison_df[display_cols].to_string(index=False))

Part 3: Ranking by Different Criteria

def create_rankings(df: pd.DataFrame) -> pd.DataFrame:
    """Create rankings for each quarterback across multiple categories."""

    rankings = df[['name']].copy()

    # Volume stats (higher is better)
    rankings['yards_rank'] = df['yards'].rank(ascending=False).astype(int)
    rankings['tds_rank'] = df['touchdowns'].rank(ascending=False).astype(int)
    rankings['total_yards_rank'] = df['total_yards'].rank(ascending=False).astype(int)

    # Efficiency stats (higher is better)
    rankings['comp_pct_rank'] = df['completion_pct'].rank(ascending=False).astype(int)
    rankings['ypa_rank'] = df['yards_per_attempt'].rank(ascending=False).astype(int)
    rankings['rating_rank'] = df['passer_rating'].rank(ascending=False).astype(int)

    # TD-INT ratio (higher is better)
    rankings['td_int_rank'] = df['td_int_ratio'].rank(ascending=False).astype(int)

    # Team success
    rankings['wins_rank'] = df['team_wins'].rank(ascending=False).astype(int)

    # Calculate composite rank (average of all rankings)
    rank_cols = [c for c in rankings.columns if c.endswith('_rank')]
    rankings['composite_rank'] = rankings[rank_cols].mean(axis=1)
    rankings['final_rank'] = rankings['composite_rank'].rank().astype(int)

    return rankings.sort_values('final_rank')


rankings = create_rankings(comparison_df)

print("\n" + "=" * 70)
print("RANKINGS BY CATEGORY")
print("=" * 70)
print(rankings.to_string(index=False))

Part 4: Detailed Head-to-Head Comparison

def head_to_head(df: pd.DataFrame, qb1: str, qb2: str) -> str:
    """Generate detailed head-to-head comparison."""

    q1 = df[df['name'] == qb1].iloc[0]
    q2 = df[df['name'] == qb2].iloc[0]

    comparison = f"""
    {'='*60}
    HEAD-TO-HEAD: {qb1} vs {qb2}
    {'='*60}

    {'Statistic':<25} {qb1:>15} {qb2:>15}
    {'-'*55}
    {'Games':<25} {q1['games']:>15} {q2['games']:>15}
    {'Completions':<25} {q1['completions']:>15} {q2['completions']:>15}
    {'Attempts':<25} {q1['attempts']:>15} {q2['attempts']:>15}
    {'Completion %':<25} {q1['completion_pct']:>14.1f}% {q2['completion_pct']:>14.1f}%
    {'Passing Yards':<25} {q1['yards']:>15} {q2['yards']:>15}
    {'Yards/Attempt':<25} {q1['yards_per_attempt']:>15.2f} {q2['yards_per_attempt']:>15.2f}
    {'Touchdowns':<25} {q1['touchdowns']:>15} {q2['touchdowns']:>15}
    {'Interceptions':<25} {q1['interceptions']:>15} {q2['interceptions']:>15}
    {'TD-INT Ratio':<25} {q1['td_int_ratio']:>15.2f} {q2['td_int_ratio']:>15.2f}
    {'Passer Rating':<25} {q1['passer_rating']:>15.1f} {q2['passer_rating']:>15.1f}
    {'Total Yards':<25} {q1['total_yards']:>15} {q2['total_yards']:>15}
    {'Total TDs':<25} {q1['total_tds']:>15} {q2['total_tds']:>15}
    {'Team Wins':<25} {q1['team_wins']:>15} {q2['team_wins']:>15}

    ADVANTAGES:
    """

    # Determine advantages
    metrics = [
        ('completion_pct', 'Completion %', True),
        ('yards_per_attempt', 'Yards/Attempt', True),
        ('passer_rating', 'Passer Rating', True),
        ('td_int_ratio', 'TD-INT Ratio', True),
        ('total_yards', 'Total Yards', True),
        ('interceptions', 'Interceptions', False),  # Lower is better
    ]

    q1_advantages = []
    q2_advantages = []

    for metric, name, higher_better in metrics:
        if higher_better:
            if q1[metric] > q2[metric]:
                q1_advantages.append(name)
            elif q2[metric] > q1[metric]:
                q2_advantages.append(name)
        else:
            if q1[metric] < q2[metric]:
                q1_advantages.append(name)
            elif q2[metric] < q1[metric]:
                q2_advantages.append(name)

    comparison += f"\n    {qb1}: {', '.join(q1_advantages) if q1_advantages else 'None'}"
    comparison += f"\n    {qb2}: {', '.join(q2_advantages) if q2_advantages else 'None'}"

    return comparison


# Compare top two
top_two = rankings.head(2)['name'].tolist()
print(head_to_head(comparison_df, top_two[0], top_two[1]))

Part 5: The Verdict

def determine_winner(df: pd.DataFrame, rankings: pd.DataFrame) -> str:
    """Determine the best quarterback with justification."""

    # Get the winner
    winner_name = rankings.iloc[0]['name']
    winner_stats = df[df['name'] == winner_name].iloc[0]

    # Calculate where winner ranks in each category
    winner_ranks = rankings[rankings['name'] == winner_name].iloc[0]

    verdict = f"""
    {'='*70}
    THE VERDICT: BEST QUARTERBACK
    {'='*70}

    WINNER: {winner_name}

    KEY STATISTICS:
    • Completion Percentage: {winner_stats['completion_pct']}%
    • Yards per Attempt: {winner_stats['yards_per_attempt']}
    • Passer Rating: {winner_stats['passer_rating']}
    • TD-to-INT Ratio: {winner_stats['td_int_ratio']}
    • Total Touchdowns: {winner_stats['total_tds']}
    • Team Record: {winner_stats['team_wins']} wins

    RANKINGS BREAKDOWN:
    • Passing Yards: #{int(winner_ranks['yards_rank'])}
    • Touchdowns: #{int(winner_ranks['tds_rank'])}
    • Completion %: #{int(winner_ranks['comp_pct_rank'])}
    • Yards/Attempt: #{int(winner_ranks['ypa_rank'])}
    • Passer Rating: #{int(winner_ranks['rating_rank'])}
    • TD-INT Ratio: #{int(winner_ranks['td_int_rank'])}

    JUSTIFICATION:
    {winner_name} emerges as the top quarterback due to the combination of:
    """

    # Add specific justifications based on their strengths
    justifications = []

    if winner_ranks['rating_rank'] <= 2:
        justifications.append("Elite efficiency as measured by passer rating")
    if winner_ranks['td_int_rank'] <= 2:
        justifications.append("Excellent ball security with high TD-to-INT ratio")
    if winner_stats['total_tds'] > 40:
        justifications.append("Prolific scoring with over 40 total touchdowns")
    if winner_stats['team_wins'] >= 12:
        justifications.append(f"Team success with {winner_stats['team_wins']} wins")

    for j in justifications:
        verdict += f"\n    • {j}"

    return verdict


verdict = determine_winner(comparison_df, rankings)
print(verdict)

Part 6: Summary Visualization Table

def create_summary_table(df: pd.DataFrame, rankings: pd.DataFrame) -> str:
    """Create a final summary table."""

    merged = pd.merge(df, rankings[['name', 'final_rank']], on='name')
    merged = merged.sort_values('final_rank')

    table = """
    ╔══════════════════════════════════════════════════════════════════════════╗
    ║                    QUARTERBACK SEASON SUMMARY                             ║
    ╠══════════════════════════════════════════════════════════════════════════╣
    ║ Rank │ Name          │ Comp%  │ YPA  │ Rating │ TDs │ INTs │ Ratio │ Wins ║
    ╠══════════════════════════════════════════════════════════════════════════╣"""

    for _, row in merged.iterrows():
        table += f"""
    ║  {int(row['final_rank']):1d}  │ {row['name']:<13} │ {row['completion_pct']:5.1f}% │ {row['yards_per_attempt']:4.1f} │ {row['passer_rating']:6.1f} │ {row['touchdowns']:3d} │  {row['interceptions']:2d}  │ {row['td_int_ratio']:5.2f} │  {row['team_wins']:2d}  ║"""

    table += """
    ╚══════════════════════════════════════════════════════════════════════════╝
    """

    return table


summary = create_summary_table(comparison_df, rankings)
print(summary)

Summary

This case study demonstrated how to:

  1. Calculate comprehensive QB metrics including efficiency rates and totals
  2. Create multi-factor rankings that balance volume and efficiency
  3. Perform head-to-head comparisons with clear advantage identification
  4. Make data-driven recommendations with statistical justification

Key Insights

  1. Volume vs. Efficiency: Raw totals can be misleading without per-attempt context
  2. Multiple Dimensions: The best QB excels across multiple categories
  3. Team Context: Individual stats should be viewed alongside team success
  4. Composite Metrics: Combining multiple rankings provides balanced evaluation

Exercises

  1. Add rushing statistics to the passer rating calculation
  2. Create an "adjusted" rating that accounts for strength of schedule
  3. Weight different categories differently in the composite ranking
  4. Add a "clutch" factor based on performance in close games