Case Study 1: Identifying Hidden Gems - QB Evaluation Beyond Traditional Stats

Overview

In this case study, you'll use advanced passing metrics to identify undervalued quarterbacks whose true performance is masked by traditional statistics. This scenario mirrors real NFL draft analysis where teams seek players whose skills exceed their raw numbers.


The Scenario

You're an analytics consultant for an NFL team's scouting department. The head scout presents you with data from four college quarterbacks, all projected as mid-round picks based on traditional statistics. Your task is to determine which QB might be significantly undervalued (or overvalued) using advanced metrics.


Part 1: The Data

import pandas as pd
import numpy as np
from typing import Dict, List, Tuple

# Raw statistics for four quarterbacks
qb_traditional = {
    'QB Adams': {
        'games': 13, 'completions': 256, 'attempts': 412,
        'yards': 3245, 'touchdowns': 24, 'interceptions': 11,
        'team': 'Mid-Major A', 'team_record': '8-5'
    },
    'QB Baker': {
        'games': 14, 'completions': 298, 'attempts': 425,
        'yards': 3567, 'touchdowns': 28, 'interceptions': 9,
        'team': 'Power 5 Team B', 'team_record': '10-4'
    },
    'QB Clark': {
        'games': 12, 'completions': 212, 'attempts': 358,
        'yards': 2876, 'touchdowns': 22, 'interceptions': 8,
        'team': 'Power 5 Team C', 'team_record': '7-5'
    },
    'QB Davis': {
        'games': 13, 'completions': 278, 'attempts': 398,
        'yards': 3456, 'touchdowns': 26, 'interceptions': 12,
        'team': 'Mid-Major D', 'team_record': '9-4'
    }
}

# Calculate traditional metrics
def calc_traditional(stats):
    return {
        'comp_pct': round(stats['completions'] / stats['attempts'] * 100, 1),
        'ypa': round(stats['yards'] / stats['attempts'], 2),
        'td_pct': round(stats['touchdowns'] / stats['attempts'] * 100, 1),
        'int_pct': round(stats['interceptions'] / stats['attempts'] * 100, 1),
        'ypg': round(stats['yards'] / stats['games'], 1)
    }

print("=" * 70)
print("TRADITIONAL STATISTICS")
print("=" * 70)
print(f"{'QB':<12} {'Comp%':>8} {'YPA':>6} {'TD%':>6} {'INT%':>6} {'YPG':>8}")
print("-" * 70)

for qb, stats in qb_traditional.items():
    trad = calc_traditional(stats)
    print(f"{qb:<12} {trad['comp_pct']:>7.1f}% {trad['ypa']:>6.2f} "
          f"{trad['td_pct']:>5.1f}% {trad['int_pct']:>5.1f}% {trad['ypg']:>8.1f}")

Traditional Stats Output:

QB           Comp%      YPA   TD%   INT%      YPG
----------------------------------------------------------------------
QB Adams       62.1%   7.88  5.8%   2.7%    249.6
QB Baker       70.1%   8.39  6.6%   2.1%    254.8
QB Clark       59.2%   8.03  6.1%   2.2%    239.7
QB Davis       69.8%   8.68  6.5%   3.0%    265.8

Based on traditional stats alone, QB Baker appears to be the clear top prospect (highest completion percentage, good YPA, lowest INT%).


Part 2: Advanced Metrics Analysis

Now let's dig deeper with advanced metrics.

# Detailed play-by-play derived metrics
qb_advanced = {
    'QB Adams': {
        'adot': 10.8,  # Average depth of target
        'cpoe': 4.2,   # Completion % over expected
        'avg_epa': 0.18,  # EPA per dropback
        'pressure_rate': 32.5,  # % of dropbacks under pressure
        'clean_pocket_comp': 68.2,  # Comp% in clean pocket
        'pressure_comp': 41.5,  # Comp% under pressure
        'deep_pass_pct': 18.5,  # % of passes 20+ yards
        'deep_comp_pct': 42.1,  # Completion % on deep passes
        'air_yards_share': 72.5,
        'third_down_comp_pct': 58.4,
        'rz_td_pct': 32.1,
        'opponent_avg_pass_def_rank': 48  # Avg defensive rank of opponents
    },
    'QB Baker': {
        'adot': 6.2,
        'cpoe': -1.8,
        'avg_epa': 0.08,
        'pressure_rate': 22.4,
        'clean_pocket_comp': 72.8,
        'pressure_comp': 52.1,
        'deep_pass_pct': 8.2,
        'deep_comp_pct': 38.5,
        'air_yards_share': 48.2,
        'third_down_comp_pct': 62.1,
        'rz_td_pct': 45.2,
        'opponent_avg_pass_def_rank': 72
    },
    'QB Clark': {
        'adot': 11.2,
        'cpoe': 6.8,
        'avg_epa': 0.22,
        'pressure_rate': 35.8,
        'clean_pocket_comp': 72.5,
        'pressure_comp': 38.2,
        'deep_pass_pct': 22.4,
        'deep_comp_pct': 48.6,
        'air_yards_share': 78.4,
        'third_down_comp_pct': 52.8,
        'rz_td_pct': 38.5,
        'opponent_avg_pass_def_rank': 35
    },
    'QB Davis': {
        'adot': 7.8,
        'cpoe': 1.2,
        'avg_epa': 0.12,
        'pressure_rate': 24.2,
        'clean_pocket_comp': 74.2,
        'pressure_comp': 48.5,
        'deep_pass_pct': 12.8,
        'deep_comp_pct': 35.2,
        'air_yards_share': 58.2,
        'third_down_comp_pct': 65.4,
        'rz_td_pct': 42.1,
        'opponent_avg_pass_def_rank': 85
    }
}

print("\n" + "=" * 70)
print("ADVANCED METRICS")
print("=" * 70)
print(f"{'QB':<12} {'aDOT':>6} {'CPOE':>7} {'EPA/DB':>8} {'Pressure':>10} {'Deep%':>7}")
print("-" * 70)

for qb, adv in qb_advanced.items():
    print(f"{qb:<12} {adv['adot']:>6.1f} {adv['cpoe']:>+6.1f}% {adv['avg_epa']:>8.2f} "
          f"{adv['pressure_rate']:>9.1f}% {adv['deep_pass_pct']:>6.1f}%")

Advanced Metrics Output:

QB           aDOT    CPOE   EPA/DB   Pressure  Deep%
----------------------------------------------------------------------
QB Adams     10.8   +4.2%     0.18      32.5%  18.5%
QB Baker      6.2   -1.8%     0.08      22.4%   8.2%
QB Clark     11.2   +6.8%     0.22      35.8%  22.4%
QB Davis      7.8   +1.2%     0.12      24.2%  12.8%

Part 3: Key Insight Analysis

class QBAdvancedEvaluator:
    """Evaluate quarterbacks using advanced metrics."""

    def __init__(self, traditional: Dict, advanced: Dict):
        self.traditional = traditional
        self.advanced = advanced

    def identify_discrepancies(self) -> Dict:
        """Find QBs whose advanced metrics differ from traditional perception."""
        discrepancies = {}

        for qb in self.traditional:
            trad = calc_traditional(self.traditional[qb])
            adv = self.advanced[qb]

            # Calculate expected vs actual perception gap
            traditional_score = (
                trad['comp_pct'] * 0.3 +
                trad['ypa'] * 5 +
                trad['td_pct'] * 5 -
                trad['int_pct'] * 8
            )

            advanced_score = (
                adv['cpoe'] * 3 +
                adv['avg_epa'] * 50 +
                adv['adot'] * 2 +
                (100 - adv['pressure_rate']) * 0.3 +
                adv['deep_comp_pct'] * 0.3
            )

            discrepancies[qb] = {
                'traditional_rank_score': round(traditional_score, 1),
                'advanced_rank_score': round(advanced_score, 1),
                'gap': round(advanced_score - traditional_score, 1)
            }

        return discrepancies

    def generate_scouting_report(self, qb_name: str) -> str:
        """Generate detailed scouting report for a QB."""
        trad = calc_traditional(self.traditional[qb_name])
        adv = self.advanced[qb_name]

        # Identify strengths
        strengths = []
        concerns = []

        if adv['cpoe'] > 3:
            strengths.append(f"Elite accuracy (+{adv['cpoe']}% CPOE) - completes difficult throws")
        elif adv['cpoe'] < -1:
            concerns.append(f"Below-expected accuracy ({adv['cpoe']}% CPOE)")

        if adv['adot'] > 9:
            strengths.append(f"Aggressive downfield passer (aDOT: {adv['adot']} yards)")
        elif adv['adot'] < 7:
            concerns.append(f"Relies heavily on short passes (aDOT: {adv['adot']} yards)")

        if adv['pressure_rate'] > 30 and adv['pressure_comp'] > 40:
            strengths.append(f"Handles pressure well ({adv['pressure_comp']}% under pressure)")
        elif adv['pressure_rate'] > 30 and adv['pressure_comp'] < 35:
            concerns.append(f"Struggles under heavy pressure ({adv['pressure_comp']}% under pressure)")

        if adv['deep_comp_pct'] > 45:
            strengths.append(f"Strong deep ball ({adv['deep_comp_pct']}% on 20+ yard passes)")

        if adv['avg_epa'] > 0.15:
            strengths.append(f"High-value passer ({adv['avg_epa']:.2f} EPA/dropback)")

        # Context
        context = []
        if adv['opponent_avg_pass_def_rank'] < 50:
            context.append(f"Faced tough defenses (avg rank: {adv['opponent_avg_pass_def_rank']})")
        elif adv['opponent_avg_pass_def_rank'] > 70:
            context.append(f"Weaker schedule (avg def rank: {adv['opponent_avg_pass_def_rank']})")

        report = f"""
╔══════════════════════════════════════════════════════════════════════════╗
║                    ADVANCED SCOUTING REPORT                              ║
║                    {qb_name:^30}                                         ║
╠══════════════════════════════════════════════════════════════════════════╣
║ TRADITIONAL PROFILE                                                      ║
╠══════════════════════════════════════════════════════════════════════════╣
║ Completion%: {trad['comp_pct']:>5.1f}%   YPA: {trad['ypa']:>5.2f}   TD%: {trad['td_pct']:>5.1f}%   INT%: {trad['int_pct']:>5.1f}%   ║
╠══════════════════════════════════════════════════════════════════════════╣
║ ADVANCED PROFILE                                                         ║
╠══════════════════════════════════════════════════════════════════════════╣
║ CPOE: {adv['cpoe']:>+5.1f}%   aDOT: {adv['adot']:>5.1f}   EPA/DB: {adv['avg_epa']:>5.2f}   Air Yards%: {adv['air_yards_share']:>5.1f}%   ║
║ Pressure Rate: {adv['pressure_rate']:>5.1f}%   Clean Pocket: {adv['clean_pocket_comp']:>5.1f}%   Under Pressure: {adv['pressure_comp']:>5.1f}%   ║
║ Deep Pass%: {adv['deep_pass_pct']:>5.1f}%   Deep Completion%: {adv['deep_comp_pct']:>5.1f}%                        ║
╠══════════════════════════════════════════════════════════════════════════╣
║ STRENGTHS                                                                ║
╠══════════════════════════════════════════════════════════════════════════╣"""

        for s in strengths:
            report += f"\n║ • {s:<71}║"

        report += """
╠══════════════════════════════════════════════════════════════════════════╣
║ CONCERNS                                                                 ║
╠══════════════════════════════════════════════════════════════════════════╣"""

        for c in concerns:
            report += f"\n║ • {c:<71}║"
        if not concerns:
            report += "\n║ • No major concerns identified                                          ║"

        report += """
╠══════════════════════════════════════════════════════════════════════════╣
║ CONTEXT                                                                  ║
╠══════════════════════════════════════════════════════════════════════════╣"""

        for ctx in context:
            report += f"\n║ • {ctx:<71}║"

        report += "\n╚══════════════════════════════════════════════════════════════════════════╝"

        return report


# Run evaluation
evaluator = QBAdvancedEvaluator(qb_traditional, qb_advanced)

# Show discrepancies
print("\n" + "=" * 70)
print("TRADITIONAL vs ADVANCED PERCEPTION GAP")
print("=" * 70)

discrepancies = evaluator.identify_discrepancies()
for qb, gap in sorted(discrepancies.items(), key=lambda x: x[1]['gap'], reverse=True):
    print(f"{qb}: Traditional Score: {gap['traditional_rank_score']:.1f}, "
          f"Advanced Score: {gap['advanced_rank_score']:.1f}, "
          f"Gap: {gap['gap']:+.1f}")

Part 4: The Hidden Gem

Based on our analysis, QB Clark emerges as the hidden gem:

# Generate detailed report for QB Clark
report = evaluator.generate_scouting_report('QB Clark')
print(report)

Key Findings for QB Clark:

  1. Highest CPOE (+6.8%): Despite having the lowest raw completion percentage (59.2%), Clark's accuracy relative to throw difficulty is elite

  2. Highest aDOT (11.2 yards): He attempts the most aggressive throws, explaining his lower raw completion percentage

  3. Highest EPA per dropback (0.22): Creates the most value per pass play

  4. Best deep ball (48.6%): Elite completion rate on 20+ yard passes

  5. Toughest schedule: Faced opponents with average pass defense rank of 35

  6. Poor offensive line: 35.8% pressure rate (highest) explains some struggles

Why Traditional Stats Miss This:

  • Low completion percentage punishes aggressive throws
  • Playing behind a poor line affects volume and consistency
  • Tough schedule depresses raw numbers
  • Traditional stats don't credit throw difficulty

Part 5: The Verdict

def create_final_ranking(evaluator: QBAdvancedEvaluator) -> pd.DataFrame:
    """Create final composite ranking."""
    rankings = []

    for qb in evaluator.traditional:
        trad = calc_traditional(evaluator.traditional[qb])
        adv = evaluator.advanced[qb]

        # Composite score weighing advanced metrics heavily
        composite = (
            adv['cpoe'] * 2.5 +          # Weight accuracy over expected
            adv['avg_epa'] * 30 +         # Value creation
            adv['adot'] * 1.5 +           # Aggressiveness
            adv['deep_comp_pct'] * 0.3 +  # Deep ball ability
            (40 - adv['opponent_avg_pass_def_rank'] / 3) +  # Schedule adjustment
            50  # Base score
        )

        rankings.append({
            'quarterback': qb,
            'traditional_rank': 0,  # Will fill in
            'advanced_composite': round(composite, 1),
            'projection': ''
        })

    df = pd.DataFrame(rankings)

    # Add traditional ranking based on passer rating equivalent
    trad_scores = []
    for qb in evaluator.traditional:
        t = calc_traditional(evaluator.traditional[qb])
        score = t['comp_pct'] * 0.5 + t['ypa'] * 5 + t['td_pct'] * 3 - t['int_pct'] * 5
        trad_scores.append((qb, score))

    trad_sorted = sorted(trad_scores, key=lambda x: x[1], reverse=True)
    for i, (qb, _) in enumerate(trad_sorted):
        df.loc[df['quarterback'] == qb, 'traditional_rank'] = i + 1

    # Advanced ranking
    df['advanced_rank'] = df['advanced_composite'].rank(ascending=False).astype(int)

    # Movement (positive = undervalued by traditional)
    df['movement'] = df['traditional_rank'] - df['advanced_rank']

    # Projections
    def get_projection(row):
        if row['advanced_rank'] == 1 and row['movement'] > 0:
            return "HIDDEN GEM - Significant undervaluation"
        elif row['advanced_rank'] == 1:
            return "Elite prospect"
        elif row['movement'] > 1:
            return "Undervalued - Worth closer look"
        elif row['movement'] < -1:
            return "Overvalued - Proceed with caution"
        else:
            return "Appropriately valued"

    df['projection'] = df.apply(get_projection, axis=1)

    return df.sort_values('advanced_rank')


final_ranking = create_final_ranking(evaluator)

print("\n" + "=" * 80)
print("FINAL QUARTERBACK RANKINGS")
print("=" * 80)
print(final_ranking[['quarterback', 'traditional_rank', 'advanced_rank',
                      'movement', 'projection']].to_string(index=False))

Final Output:

FINAL QUARTERBACK RANKINGS
================================================================================
quarterback  traditional_rank  advanced_rank  movement  projection
QB Clark                    4              1        +3  HIDDEN GEM - Significant undervaluation
QB Adams                    3              2        +1  Undervalued - Worth closer look
QB Davis                    2              3        -1  Appropriately valued
QB Baker                    1              4        -3  Overvalued - Proceed with caution

Summary

This case study demonstrated how advanced passing metrics reveal information hidden by traditional statistics:

  1. CPOE adjusts for throw difficulty, revealing true accuracy
  2. aDOT shows passing style and aggressiveness
  3. EPA measures actual value created, not just yards
  4. Pressure metrics separate QB skill from offensive line quality
  5. Schedule adjustment contextualizes raw statistics

Key Takeaway

QB Clark, ranked last by traditional metrics, emerges as the top prospect when accounting for throw difficulty, value creation, and situational context. This is exactly the type of edge that advanced analytics provides in talent evaluation.