Case Study 2: Measuring Recruiting Efficiency Across Programs
Overview
This case study analyzes recruiting efficiency across FBS programs, comparing how well different schools develop talent relative to their recruiting rankings.
Business Context
Conference media networks and analytics providers need to: - Rank programs by recruiting efficiency - Identify schools that over/under-develop talent - Project future program trajectories - Understand factors driving development success
Data Description
# Historical recruiting data: 10 years, 130 FBS programs
historical_schema = {
'player_id': 'unique identifier',
'name': 'player name',
'recruiting_class': 'year signed',
'program': 'school name',
'position': 'position signed at',
# Recruiting metrics
'composite_rating': 'signing rating',
'star_rating': 'star level',
'national_rank': 'overall rank',
'position_rank': 'position rank',
# Outcomes
'games_played': 'career games',
'games_started': 'career starts',
'became_starter': 'started 12+ games in a season',
'all_conference': 'earned all-conference honors',
'all_american': 'earned all-american honors',
'drafted': 'was drafted to NFL',
'draft_round': 'round selected',
'transfer_out': 'transferred before eligibility expired'
}
sample_data = {
'years': '2014-2023',
'total_recruits': 32500,
'programs': 130,
'avg_class_size': 25,
'outcome_coverage': 0.95
}
Analysis Framework
Efficiency Model
class RecruitingEfficiencyModel:
"""
Model to measure recruiting development efficiency.
Compares actual outcomes to expected outcomes based on
recruiting ratings.
"""
def __init__(self):
# Historical outcome rates by star rating
self.baseline_rates = {
5: {
'starter_rate': 0.85,
'all_conf_rate': 0.45,
'draft_rate': 0.55,
'first_round_rate': 0.20,
'transfer_rate': 0.15
},
4: {
'starter_rate': 0.65,
'all_conf_rate': 0.18,
'draft_rate': 0.22,
'first_round_rate': 0.04,
'transfer_rate': 0.22
},
3: {
'starter_rate': 0.35,
'all_conf_rate': 0.06,
'draft_rate': 0.06,
'first_round_rate': 0.005,
'transfer_rate': 0.28
},
2: {
'starter_rate': 0.12,
'all_conf_rate': 0.015,
'draft_rate': 0.015,
'first_round_rate': 0.001,
'transfer_rate': 0.35
}
}
def calculate_expected_outcomes(self,
program_recruits: pd.DataFrame) -> Dict:
"""
Calculate expected outcomes for program's recruits.
Parameters:
-----------
program_recruits : pd.DataFrame
All recruits for a program with star ratings
Returns:
--------
dict : Expected outcome counts
"""
expected = {
'starters': 0,
'all_conference': 0,
'draft_picks': 0,
'first_rounders': 0,
'transfers_out': 0
}
for _, recruit in program_recruits.iterrows():
star = int(recruit['star_rating'])
rates = self.baseline_rates.get(star, self.baseline_rates[2])
expected['starters'] += rates['starter_rate']
expected['all_conference'] += rates['all_conf_rate']
expected['draft_picks'] += rates['draft_rate']
expected['first_rounders'] += rates['first_round_rate']
expected['transfers_out'] += rates['transfer_rate']
return expected
def calculate_actual_outcomes(self,
program_recruits: pd.DataFrame) -> Dict:
"""
Calculate actual outcomes from historical data.
"""
return {
'starters': program_recruits['became_starter'].sum(),
'all_conference': program_recruits['all_conference'].sum(),
'draft_picks': program_recruits['drafted'].sum(),
'first_rounders': (program_recruits['draft_round'] == 1).sum(),
'transfers_out': program_recruits['transfer_out'].sum()
}
def calculate_efficiency(self,
program_recruits: pd.DataFrame) -> Dict:
"""
Calculate efficiency metrics for program.
Returns:
--------
dict : Efficiency metrics with actual/expected comparisons
"""
expected = self.calculate_expected_outcomes(program_recruits)
actual = self.calculate_actual_outcomes(program_recruits)
efficiency = {}
for metric in expected.keys():
if expected[metric] > 0:
ratio = actual[metric] / expected[metric]
efficiency[f'{metric}_efficiency'] = ratio
efficiency[f'{metric}_expected'] = expected[metric]
efficiency[f'{metric}_actual'] = actual[metric]
# Overall efficiency (weighted)
weights = {
'starters': 0.25,
'all_conference': 0.30,
'draft_picks': 0.30,
'first_rounders': 0.15
}
overall = 0
for metric, weight in weights.items():
if f'{metric}_efficiency' in efficiency:
overall += weight * efficiency[f'{metric}_efficiency']
efficiency['overall_efficiency'] = overall
# Retention efficiency (inverse of transfer rate)
if 'transfers_out_efficiency' in efficiency:
efficiency['retention_efficiency'] = 1 / efficiency['transfers_out_efficiency']
return efficiency
def rank_programs(self,
all_program_data: pd.DataFrame,
min_recruits: int = 100) -> pd.DataFrame:
"""
Rank all programs by efficiency.
Parameters:
-----------
all_program_data : pd.DataFrame
Recruiting data for all programs
min_recruits : int
Minimum recruits required for ranking
Returns:
--------
pd.DataFrame : Programs ranked by efficiency
"""
rankings = []
for program in all_program_data['program'].unique():
program_data = all_program_data[
all_program_data['program'] == program
]
if len(program_data) < min_recruits:
continue
efficiency = self.calculate_efficiency(program_data)
rankings.append({
'program': program,
'total_recruits': len(program_data),
'avg_rating': program_data['composite_rating'].mean(),
'avg_stars': program_data['star_rating'].mean(),
**efficiency
})
rankings_df = pd.DataFrame(rankings)
rankings_df['efficiency_rank'] = rankings_df['overall_efficiency'].rank(
ascending=False
)
return rankings_df.sort_values('overall_efficiency', ascending=False)
Contributing Factor Analysis
class EfficiencyFactorAnalyzer:
"""
Analyze factors contributing to recruiting efficiency.
"""
def __init__(self):
self.factors = [
'coaching_stability',
'facilities_rating',
'development_program',
'scheme_complexity',
'playing_time_opportunity',
'nfl_pipeline'
]
def analyze_efficiency_drivers(self,
program_metrics: pd.DataFrame,
efficiency_scores: pd.DataFrame) -> Dict:
"""
Identify factors correlated with efficiency.
Parameters:
-----------
program_metrics : pd.DataFrame
Program characteristics data
efficiency_scores : pd.DataFrame
Efficiency scores by program
Returns:
--------
dict : Factor correlations and importance
"""
# Merge data
merged = program_metrics.merge(
efficiency_scores,
on='program'
)
correlations = {}
for factor in self.factors:
if factor in merged.columns:
corr = merged[factor].corr(merged['overall_efficiency'])
correlations[factor] = corr
# Sort by absolute correlation
sorted_factors = sorted(
correlations.items(),
key=lambda x: abs(x[1]),
reverse=True
)
return {
'correlations': dict(sorted_factors),
'top_factors': [f[0] for f in sorted_factors[:3]],
'analysis': self._interpret_factors(dict(sorted_factors))
}
def _interpret_factors(self, correlations: Dict) -> str:
"""Generate interpretation of factor analysis."""
interpretations = []
for factor, corr in correlations.items():
if abs(corr) > 0.5:
direction = 'positively' if corr > 0 else 'negatively'
interpretations.append(
f"{factor} is strongly {direction} correlated "
f"with efficiency (r={corr:.2f})"
)
elif abs(corr) > 0.3:
direction = 'positively' if corr > 0 else 'negatively'
interpretations.append(
f"{factor} is moderately {direction} correlated "
f"with efficiency (r={corr:.2f})"
)
return '\n'.join(interpretations)
def compare_program_groups(self,
rankings: pd.DataFrame,
program_metrics: pd.DataFrame) -> Dict:
"""
Compare characteristics of efficient vs. inefficient programs.
"""
# Top 25% vs Bottom 25%
n = len(rankings)
top_programs = rankings.head(n // 4)['program'].tolist()
bottom_programs = rankings.tail(n // 4)['program'].tolist()
top_metrics = program_metrics[
program_metrics['program'].isin(top_programs)
]
bottom_metrics = program_metrics[
program_metrics['program'].isin(bottom_programs)
]
comparison = {}
for col in program_metrics.select_dtypes(include=[np.number]).columns:
comparison[col] = {
'top_25_avg': top_metrics[col].mean(),
'bottom_25_avg': bottom_metrics[col].mean(),
'difference': top_metrics[col].mean() - bottom_metrics[col].mean()
}
return comparison
Results
Program Efficiency Rankings
RECRUITING EFFICIENCY RANKINGS (2014-2023)
===========================================
MOST EFFICIENT PROGRAMS (Top 15):
Rank | Program | Eff. | Recruits | Avg★ | Starters | All-Conf | Draft
-----|------------------|-------|----------|------|----------|----------|------
1 | Wisconsin | 1.42 | 248 | 3.4 | 112/87 | 28/15 | 24/13
2 | Iowa | 1.38 | 242 | 3.2 | 98/76 | 24/13 | 22/12
3 | Notre Dame | 1.35 | 265 | 3.9 | 142/123 | 38/28 | 35/26
4 | Pittsburgh | 1.31 | 238 | 3.3 | 96/75 | 22/12 | 19/11
5 | Cincinnati | 1.29 | 225 | 3.0 | 85/62 | 18/9 | 14/8
6 | Stanford | 1.27 | 252 | 3.6 | 118/98 | 28/20 | 26/18
7 | Oregon | 1.25 | 258 | 3.8 | 128/108 | 32/24 | 30/22
8 | Michigan State | 1.23 | 245 | 3.4 | 108/88 | 24/16 | 20/14
9 | Utah | 1.21 | 232 | 3.1 | 92/72 | 20/12 | 16/10
10 | Northwestern | 1.19 | 235 | 3.2 | 88/70 | 18/12 | 14/10
11 | UCF | 1.18 | 228 | 3.1 | 82/65 | 16/10 | 12/8
12 | Ohio State | 1.16 | 278 | 4.3 | 185/178 | 52/48 | 58/52
13 | Alabama | 1.14 | 285 | 4.4 | 192/185 | 55/52 | 65/58
14 | Clemson | 1.12 | 272 | 4.2 | 178/168 | 48/44 | 52/46
15 | Georgia | 1.11 | 280 | 4.3 | 182/172 | 50/46 | 55/48
(Format: Actual/Expected)
LEAST EFFICIENT PROGRAMS (Bottom 10):
Rank | Program | Eff. | Recruits | Avg★ | Notes
-----|------------------|-------|----------|------|---------------------------
121 | Arizona | 0.72 | 242 | 3.3 | Coaching instability
122 | Colorado | 0.70 | 238 | 3.2 | Multiple rebuilds
123 | Kansas | 0.68 | 235 | 3.0 | Program struggles
124 | Vanderbilt | 0.67 | 228 | 3.1 | Competition disadvantage
125 | Rutgers | 0.65 | 245 | 3.2 | Development issues
...
Key Efficiency Metrics
EFFICIENCY BREAKDOWN BY CATEGORY
================================
Development Efficiency (Starters):
----------------------------------
Most Efficient: Wisconsin (129%), Iowa (128%), Notre Dame (115%)
Least Efficient: Kansas (58%), Vanderbilt (62%), Arizona (65%)
All-Conference Efficiency:
--------------------------
Most Efficient: Pittsburgh (183%), Cincinnati (200%), Utah (167%)
Least Efficient: Maryland (45%), Rutgers (50%), Indiana (52%)
NFL Draft Efficiency:
---------------------
Most Efficient: Wisconsin (185%), Iowa (183%), Pittsburgh (173%)
Least Efficient: Kansas (38%), Vanderbilt (42%), Colorado (48%)
Retention Efficiency:
---------------------
Best Retention: Notre Dame (92%), Stanford (91%), Northwestern (90%)
Worst Retention: Arizona (62%), Colorado (65%), Kansas (68%)
Factor Analysis Results
# Correlation analysis results
factor_correlations = {
'coaching_stability': 0.62, # Strong positive
'player_development_staff': 0.58, # Strong positive
'playing_time_opportunity': 0.45, # Moderate positive
'strength_program_rating': 0.42, # Moderate positive
'academic_support': 0.38, # Moderate positive
'facilities_rating': 0.28, # Weak positive
'recruiting_budget': 0.22, # Weak positive
'brand_value': 0.15 # Weak positive
}
# Key finding: Coaching stability most correlated with efficiency
# Programs with 5+ year head coach tenure: avg efficiency 1.18
# Programs with <3 year head coach tenure: avg efficiency 0.82
Conference Efficiency Comparison
EFFICIENCY BY CONFERENCE
========================
Conference | Avg Eff | Top Program | Lowest Program
--------------|---------|----------------|---------------
Big Ten | 1.08 | Wisconsin | Rutgers
SEC | 1.05 | Georgia | Vanderbilt
ACC | 0.98 | Clemson | Syracuse
Big 12 | 0.95 | TCU | Kansas
Pac-12 | 0.92 | Oregon | Arizona
Group of 5 | 0.88 | Cincinnati | Multiple
Note: Elite SEC/Big Ten programs recruit better AND develop better
Lower-tier programs show wide efficiency variance
Implementation
Complete Efficiency Report Generator
class EfficiencyReportGenerator:
"""
Generate comprehensive efficiency reports.
"""
def __init__(self, model: RecruitingEfficiencyModel):
self.model = model
def generate_program_report(self,
program_name: str,
program_data: pd.DataFrame) -> str:
"""
Generate detailed efficiency report for one program.
"""
efficiency = self.model.calculate_efficiency(program_data)
report = f"""
RECRUITING EFFICIENCY REPORT
{'=' * 50}
Program: {program_name}
Period: {program_data['recruiting_class'].min()} - {program_data['recruiting_class'].max()}
Total Recruits Analyzed: {len(program_data)}
OVERALL EFFICIENCY: {efficiency['overall_efficiency']:.2f}
{'Outperforming' if efficiency['overall_efficiency'] > 1 else 'Underperforming'} expectations
CATEGORY BREAKDOWN:
-------------------
Starter Development:
Expected: {efficiency.get('starters_expected', 0):.1f}
Actual: {efficiency.get('starters_actual', 0):.0f}
Efficiency: {efficiency.get('starters_efficiency', 0):.2f}
All-Conference Production:
Expected: {efficiency.get('all_conference_expected', 0):.1f}
Actual: {efficiency.get('all_conference_actual', 0):.0f}
Efficiency: {efficiency.get('all_conference_efficiency', 0):.2f}
NFL Draft Production:
Expected: {efficiency.get('draft_picks_expected', 0):.1f}
Actual: {efficiency.get('draft_picks_actual', 0):.0f}
Efficiency: {efficiency.get('draft_picks_efficiency', 0):.2f}
Retention Rate:
Efficiency: {efficiency.get('retention_efficiency', 1):.2f}
INTERPRETATION:
---------------
"""
# Add interpretation
if efficiency['overall_efficiency'] > 1.2:
report += "Elite development program - significantly outperforming recruiting\n"
elif efficiency['overall_efficiency'] > 1.0:
report += "Above average development - meeting/exceeding expectations\n"
elif efficiency['overall_efficiency'] > 0.8:
report += "Below average - not fully developing recruited talent\n"
else:
report += "Development concerns - significant underperformance\n"
return report
def generate_comparison_report(self,
programs: List[str],
all_data: pd.DataFrame) -> str:
"""
Generate comparison report for multiple programs.
"""
report = "PROGRAM COMPARISON REPORT\n"
report += "=" * 50 + "\n\n"
comparisons = []
for program in programs:
program_data = all_data[all_data['program'] == program]
if len(program_data) > 0:
efficiency = self.model.calculate_efficiency(program_data)
comparisons.append({
'program': program,
'recruits': len(program_data),
'avg_rating': program_data['composite_rating'].mean(),
'efficiency': efficiency['overall_efficiency']
})
comparison_df = pd.DataFrame(comparisons)
comparison_df = comparison_df.sort_values('efficiency', ascending=False)
report += comparison_df.to_string(index=False)
return report
Key Findings
-
Efficiency vs. Raw Recruiting: Programs with lower recruiting rankings but high efficiency often outperform higher-ranked classes
-
Coaching Stability: The strongest predictor of efficiency - 5+ year tenures show 43% higher efficiency
-
Player Development Staff: Dedicated development coordinators correlated with 35% higher efficiency
-
Scheme Fit: Programs that prioritize scheme fit over pure talent show better development
-
Retention: Top efficiency programs retain 92% of recruits vs. 68% for bottom programs
Lessons Learned
-
Stars Matter, But Development Matters More: A 3-star in an efficient program often outperforms a 4-star in an inefficient one
-
Efficiency is Predictive: Programs with high historical efficiency tend to maintain it
-
Conference Effects: Elite conferences show compressed efficiency ranges; G5 shows more variance
-
Time Horizon: Efficiency metrics stabilize after 5+ years of data
-
Transfer Era Impact: Transfer portal is beginning to affect traditional efficiency metrics