Case Study 1: Building a Bowl Game Prediction System
Overview
This case study develops a complete machine learning system to predict college football bowl game outcomes. We'll work through the entire ML workflow from problem definition to deployment, addressing the unique challenges of predicting high-stakes postseason games.
Background
Bowl games present unique prediction challenges: - Long layoffs between regular season and bowl games (3-6 weeks) - Player opt-outs by NFL draft prospects - Coaching changes and interim coaches - Motivation disparities between teams - Limited historical data for specific matchups
Business Problem
A sports media company wants to build a bowl game prediction system for their website. Requirements: 1. Predict winner for all 40+ bowl games 2. Provide win probability with confidence intervals 3. Update predictions as new information emerges 4. Explain predictions in human-understandable terms 5. Track prediction accuracy for credibility
Solution Architecture
┌─────────────────────────────────────────────────────────────────┐
│ Bowl Prediction System │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Historical │───▶│ Feature │───▶│ Model │ │
│ │ Data │ │ Engineering │ │ Training │ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
│ │ │ │
│ ▼ ▼ │
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
│ │ Live │───▶│ Prediction │───▶│ Output │ │
│ │ Updates │ │ Engine │ │ & Reports │ │
│ └──────────────┘ └──────────────┘ └──────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
Implementation
Part 1: Data Collection and Preparation
"""
Bowl Game Prediction System
Part 1: Data Collection and Preparation
"""
import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import List, Dict, Optional
from datetime import datetime
@dataclass
class TeamSeason:
"""Team statistics for a season."""
team: str
season: int
conference: str
wins: int
losses: int
offensive_epa: float
defensive_epa: float
sp_plus_rating: float
strength_of_schedule: float
turnover_margin: float
third_down_pct: float
red_zone_pct: float
@dataclass
class BowlGame:
"""Bowl game data."""
bowl_name: str
season: int
date: datetime
team1: str
team2: str
team1_score: Optional[int] = None
team2_score: Optional[int] = None
team1_opt_outs: int = 0
team2_opt_outs: int = 0
team1_coach_change: bool = False
team2_coach_change: bool = False
class BowlDataPipeline:
"""
Data pipeline for bowl game prediction.
"""
def __init__(self):
self.team_stats = {}
self.bowl_games = []
def load_team_stats(self, filepath: str) -> None:
"""Load historical team statistics."""
df = pd.read_csv(filepath)
for _, row in df.iterrows():
key = (row['team'], row['season'])
self.team_stats[key] = TeamSeason(
team=row['team'],
season=row['season'],
conference=row['conference'],
wins=row['wins'],
losses=row['losses'],
offensive_epa=row['offensive_epa'],
defensive_epa=row['defensive_epa'],
sp_plus_rating=row['sp_plus_rating'],
strength_of_schedule=row['sos'],
turnover_margin=row['turnover_margin'],
third_down_pct=row['third_down_pct'],
red_zone_pct=row['red_zone_pct']
)
def load_bowl_history(self, filepath: str) -> None:
"""Load historical bowl game results."""
df = pd.read_csv(filepath)
for _, row in df.iterrows():
self.bowl_games.append(BowlGame(
bowl_name=row['bowl_name'],
season=row['season'],
date=pd.to_datetime(row['date']),
team1=row['team1'],
team2=row['team2'],
team1_score=row['team1_score'],
team2_score=row['team2_score']
))
def create_training_data(self) -> pd.DataFrame:
"""Create feature matrix from historical data."""
rows = []
for game in self.bowl_games:
if game.team1_score is None:
continue # Skip games without results
team1_stats = self.team_stats.get((game.team1, game.season))
team2_stats = self.team_stats.get((game.team2, game.season))
if team1_stats is None or team2_stats is None:
continue
# Create features
features = {
'season': game.season,
'bowl_name': game.bowl_name,
'team1': game.team1,
'team2': game.team2,
# Team 1 raw stats
'team1_wins': team1_stats.wins,
'team1_off_epa': team1_stats.offensive_epa,
'team1_def_epa': team1_stats.defensive_epa,
'team1_sp_plus': team1_stats.sp_plus_rating,
'team1_sos': team1_stats.strength_of_schedule,
# Team 2 raw stats
'team2_wins': team2_stats.wins,
'team2_off_epa': team2_stats.offensive_epa,
'team2_def_epa': team2_stats.defensive_epa,
'team2_sp_plus': team2_stats.sp_plus_rating,
'team2_sos': team2_stats.strength_of_schedule,
# Differential features
'win_diff': team1_stats.wins - team2_stats.wins,
'sp_plus_diff': team1_stats.sp_plus_rating - team2_stats.sp_plus_rating,
'off_epa_diff': team1_stats.offensive_epa - team2_stats.offensive_epa,
'def_epa_diff': team1_stats.defensive_epa - team2_stats.defensive_epa,
# Target: Team 1 won
'team1_won': int(game.team1_score > game.team2_score),
'margin': game.team1_score - game.team2_score
}
rows.append(features)
return pd.DataFrame(rows)
def generate_sample_data(n_seasons: int = 5,
games_per_season: int = 40) -> BowlDataPipeline:
"""Generate sample data for demonstration."""
np.random.seed(42)
pipeline = BowlDataPipeline()
teams = ['Alabama', 'Ohio State', 'Georgia', 'Michigan', 'Texas',
'Oregon', 'Penn State', 'Clemson', 'Florida State', 'USC',
'LSU', 'Notre Dame', 'Oklahoma', 'Tennessee', 'Ole Miss']
# Generate team stats
for season in range(2019, 2019 + n_seasons):
for team in teams:
pipeline.team_stats[(team, season)] = TeamSeason(
team=team,
season=season,
conference='Power 5',
wins=np.random.randint(6, 13),
losses=12 - np.random.randint(6, 13),
offensive_epa=np.random.uniform(-0.1, 0.3),
defensive_epa=np.random.uniform(-0.25, 0.05),
sp_plus_rating=np.random.uniform(-5, 25),
strength_of_schedule=np.random.uniform(-2, 2),
turnover_margin=np.random.uniform(-1.5, 1.5),
third_down_pct=np.random.uniform(0.35, 0.50),
red_zone_pct=np.random.uniform(0.75, 0.95)
)
# Generate bowl games
for season in range(2019, 2019 + n_seasons):
shuffled_teams = teams.copy()
np.random.shuffle(shuffled_teams)
for i in range(0, len(shuffled_teams) - 1, 2):
team1, team2 = shuffled_teams[i], shuffled_teams[i+1]
team1_stats = pipeline.team_stats[(team1, season)]
team2_stats = pipeline.team_stats[(team2, season)]
# Simulate outcome based on SP+ difference
sp_diff = team1_stats.sp_plus_rating - team2_stats.sp_plus_rating
team1_prob = 1 / (1 + np.exp(-sp_diff / 10))
team1_won = np.random.random() < team1_prob
margin = int(np.abs(np.random.normal(sp_diff, 14)))
if team1_won:
team1_score = 28 + margin // 2
team2_score = 28 - margin // 2
else:
team1_score = 28 - margin // 2
team2_score = 28 + margin // 2
pipeline.bowl_games.append(BowlGame(
bowl_name=f'Bowl {i//2 + 1}',
season=season,
date=datetime(season, 12, 28),
team1=team1,
team2=team2,
team1_score=max(0, team1_score),
team2_score=max(0, team2_score)
))
return pipeline
Part 2: Feature Engineering
"""
Bowl Game Prediction System
Part 2: Feature Engineering
"""
import pandas as pd
import numpy as np
from typing import List, Dict
class BowlFeatureEngineer:
"""
Feature engineering for bowl game prediction.
"""
def __init__(self):
self.feature_columns = []
def create_base_features(self, df: pd.DataFrame) -> pd.DataFrame:
"""Create base features from raw statistics."""
features = df.copy()
# Already have differential features from pipeline
base_cols = ['win_diff', 'sp_plus_diff', 'off_epa_diff', 'def_epa_diff']
# Combined efficiency differential
features['total_efficiency_diff'] = (
features['off_epa_diff'] - features['def_epa_diff']
)
# SOS-adjusted win differential
features['adj_win_diff'] = (
features['win_diff'] +
(features['team1_sos'] - features['team2_sos'])
)
self.feature_columns = base_cols + ['total_efficiency_diff', 'adj_win_diff']
return features
def create_bowl_specific_features(self, df: pd.DataFrame) -> pd.DataFrame:
"""Create bowl-specific features."""
features = df.copy()
# Bowl tier (based on name patterns - simplified)
def get_bowl_tier(name):
if 'playoff' in name.lower() or 'championship' in name.lower():
return 1
elif 'rose' in name.lower() or 'orange' in name.lower():
return 2
else:
return 3
features['bowl_tier'] = features['bowl_name'].apply(get_bowl_tier)
return features
def create_conference_features(self, df: pd.DataFrame) -> pd.DataFrame:
"""Create conference matchup features."""
features = df.copy()
# Placeholder for conference-based features
# In real implementation, would encode conference strength differentials
return features
def prepare_features(self, df: pd.DataFrame) -> pd.DataFrame:
"""Run full feature engineering pipeline."""
df = self.create_base_features(df)
df = self.create_bowl_specific_features(df)
df = self.create_conference_features(df)
return df
def get_feature_matrix(self, df: pd.DataFrame) -> pd.DataFrame:
"""Extract feature matrix for modeling."""
return df[self.feature_columns]
Part 3: Model Training and Evaluation
"""
Bowl Game Prediction System
Part 3: Model Training and Evaluation
"""
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, brier_score_loss, log_loss
from typing import Dict, List, Tuple
class BowlPredictionModel:
"""
Machine learning model for bowl game predictions.
"""
def __init__(self, model_type: str = 'logistic'):
self.model_type = model_type
self.model = None
self.scaler = StandardScaler()
self.is_fitted = False
self.training_metrics = {}
def _get_base_model(self):
"""Initialize base model."""
if self.model_type == 'logistic':
return LogisticRegression(max_iter=1000, random_state=42)
elif self.model_type == 'random_forest':
return RandomForestClassifier(n_estimators=100, random_state=42)
elif self.model_type == 'gradient_boost':
return GradientBoostingClassifier(n_estimators=100, random_state=42)
else:
raise ValueError(f"Unknown model type: {self.model_type}")
def train(self, X: pd.DataFrame, y: pd.Series,
calibrate: bool = True) -> Dict:
"""
Train the prediction model.
Parameters:
-----------
X : pd.DataFrame
Feature matrix
y : pd.Series
Target (team1 won)
calibrate : bool
Whether to calibrate probabilities
Returns:
--------
dict : Training results
"""
# Scale features
X_scaled = self.scaler.fit_transform(X)
# Get base model
base_model = self._get_base_model()
# Optionally calibrate
if calibrate:
self.model = CalibratedClassifierCV(base_model, cv=5)
else:
self.model = base_model
# Cross-validation
tscv = TimeSeriesSplit(n_splits=5)
cv_scores = cross_val_score(
self._get_base_model(), X_scaled, y,
cv=tscv, scoring='accuracy'
)
# Fit final model
self.model.fit(X_scaled, y)
self.is_fitted = True
self.training_metrics = {
'cv_accuracy_mean': cv_scores.mean(),
'cv_accuracy_std': cv_scores.std(),
'cv_scores': cv_scores.tolist(),
'n_samples': len(y),
'n_features': X.shape[1]
}
return self.training_metrics
def predict(self, X: pd.DataFrame) -> np.ndarray:
"""Predict outcomes."""
if not self.is_fitted:
raise ValueError("Model not fitted")
X_scaled = self.scaler.transform(X)
return self.model.predict(X_scaled)
def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
"""Predict probabilities."""
if not self.is_fitted:
raise ValueError("Model not fitted")
X_scaled = self.scaler.transform(X)
return self.model.predict_proba(X_scaled)
def evaluate(self, X: pd.DataFrame, y: pd.Series) -> Dict:
"""Evaluate model on test data."""
y_pred = self.predict(X)
y_prob = self.predict_proba(X)[:, 1]
return {
'accuracy': accuracy_score(y, y_pred),
'brier_score': brier_score_loss(y, y_prob),
'log_loss': log_loss(y, y_prob),
'n_correct': (y == y_pred).sum(),
'n_total': len(y)
}
class ModelEvaluator:
"""
Comprehensive model evaluation.
"""
@staticmethod
def compare_models(X: pd.DataFrame, y: pd.Series,
models: Dict[str, str]) -> pd.DataFrame:
"""Compare multiple models."""
results = []
for name, model_type in models.items():
model = BowlPredictionModel(model_type=model_type)
metrics = model.train(X, y)
results.append({
'model': name,
'cv_accuracy': metrics['cv_accuracy_mean'],
'cv_std': metrics['cv_accuracy_std']
})
return pd.DataFrame(results).sort_values('cv_accuracy', ascending=False)
@staticmethod
def evaluate_calibration(y_true: np.ndarray,
y_prob: np.ndarray,
n_bins: int = 10) -> Dict:
"""Evaluate probability calibration."""
from sklearn.calibration import calibration_curve
prob_true, prob_pred = calibration_curve(y_true, y_prob, n_bins=n_bins)
# Expected Calibration Error
bin_indices = np.digitize(y_prob, np.linspace(0, 1, n_bins + 1)[1:-1])
ece = 0
for i in range(n_bins):
mask = bin_indices == i
if mask.sum() > 0:
bin_accuracy = y_true[mask].mean()
bin_confidence = y_prob[mask].mean()
bin_weight = mask.sum() / len(y_prob)
ece += bin_weight * abs(bin_accuracy - bin_confidence)
return {
'ece': ece,
'brier': brier_score_loss(y_true, y_prob),
'calibration_curve': (prob_true.tolist(), prob_pred.tolist())
}
Part 4: Prediction System
"""
Bowl Game Prediction System
Part 4: Complete Prediction System
"""
import pandas as pd
import numpy as np
from dataclasses import dataclass
from typing import Dict, List, Optional
@dataclass
class BowlPrediction:
"""Single bowl game prediction."""
bowl_name: str
team1: str
team2: str
team1_win_prob: float
team2_win_prob: float
predicted_winner: str
confidence: str # High, Medium, Low
key_factors: List[str]
class BowlPredictionSystem:
"""
Complete system for bowl game predictions.
"""
def __init__(self):
self.pipeline = None
self.feature_engineer = None
self.model = None
self.feature_columns = None
def train(self, pipeline: 'BowlDataPipeline') -> Dict:
"""Train the prediction system."""
self.pipeline = pipeline
self.feature_engineer = BowlFeatureEngineer()
# Create training data
df = pipeline.create_training_data()
df = self.feature_engineer.prepare_features(df)
# Get features and target
self.feature_columns = self.feature_engineer.feature_columns
X = df[self.feature_columns]
y = df['team1_won']
# Train model
self.model = BowlPredictionModel(model_type='logistic')
training_metrics = self.model.train(X, y, calibrate=True)
return training_metrics
def predict_game(self, team1_stats: 'TeamSeason',
team2_stats: 'TeamSeason',
bowl_name: str = 'Bowl') -> BowlPrediction:
"""
Predict a single bowl game.
Parameters:
-----------
team1_stats : TeamSeason
Statistics for team 1
team2_stats : TeamSeason
Statistics for team 2
bowl_name : str
Name of the bowl game
Returns:
--------
BowlPrediction : Complete prediction
"""
# Create features
features = {
'win_diff': team1_stats.wins - team2_stats.wins,
'sp_plus_diff': team1_stats.sp_plus_rating - team2_stats.sp_plus_rating,
'off_epa_diff': team1_stats.offensive_epa - team2_stats.offensive_epa,
'def_epa_diff': team1_stats.defensive_epa - team2_stats.defensive_epa,
'total_efficiency_diff': (
(team1_stats.offensive_epa - team2_stats.offensive_epa) -
(team1_stats.defensive_epa - team2_stats.defensive_epa)
),
'adj_win_diff': (
(team1_stats.wins - team2_stats.wins) +
(team1_stats.strength_of_schedule - team2_stats.strength_of_schedule)
)
}
X = pd.DataFrame([features])[self.feature_columns]
# Get prediction
prob = self.model.predict_proba(X)[0]
team1_prob = prob[1]
team2_prob = prob[0]
# Determine confidence
max_prob = max(team1_prob, team2_prob)
if max_prob >= 0.70:
confidence = 'High'
elif max_prob >= 0.55:
confidence = 'Medium'
else:
confidence = 'Low'
# Identify key factors
key_factors = self._identify_key_factors(features)
predicted_winner = team1_stats.team if team1_prob > 0.5 else team2_stats.team
return BowlPrediction(
bowl_name=bowl_name,
team1=team1_stats.team,
team2=team2_stats.team,
team1_win_prob=round(team1_prob, 3),
team2_win_prob=round(team2_prob, 3),
predicted_winner=predicted_winner,
confidence=confidence,
key_factors=key_factors
)
def _identify_key_factors(self, features: Dict) -> List[str]:
"""Identify key factors driving the prediction."""
factors = []
if abs(features['sp_plus_diff']) > 10:
if features['sp_plus_diff'] > 0:
factors.append("Significant SP+ rating advantage")
else:
factors.append("Significant SP+ rating disadvantage")
if abs(features['off_epa_diff']) > 0.15:
if features['off_epa_diff'] > 0:
factors.append("Superior offensive efficiency")
else:
factors.append("Inferior offensive efficiency")
if abs(features['win_diff']) >= 3:
if features['win_diff'] > 0:
factors.append("More regular season wins")
else:
factors.append("Fewer regular season wins")
if not factors:
factors.append("Evenly matched teams")
return factors
def generate_predictions_report(self,
upcoming_games: List[tuple]) -> pd.DataFrame:
"""
Generate predictions for multiple games.
Parameters:
-----------
upcoming_games : list
List of (bowl_name, team1, team2) tuples
Returns:
--------
pd.DataFrame : Predictions for all games
"""
predictions = []
for bowl_name, team1, team2 in upcoming_games:
# Get team stats (using most recent season)
team1_stats = self._get_team_stats(team1)
team2_stats = self._get_team_stats(team2)
if team1_stats and team2_stats:
pred = self.predict_game(team1_stats, team2_stats, bowl_name)
predictions.append({
'bowl': pred.bowl_name,
'team1': pred.team1,
'team2': pred.team2,
'predicted_winner': pred.predicted_winner,
'win_probability': max(pred.team1_win_prob, pred.team2_win_prob),
'confidence': pred.confidence,
'key_factors': '; '.join(pred.key_factors)
})
return pd.DataFrame(predictions)
def _get_team_stats(self, team: str) -> Optional['TeamSeason']:
"""Get most recent stats for a team."""
for (t, season), stats in self.pipeline.team_stats.items():
if t == team:
return stats
return None
# =============================================================================
# DEMONSTRATION
# =============================================================================
if __name__ == "__main__":
print("=" * 70)
print("BOWL GAME PREDICTION SYSTEM")
print("=" * 70)
# Generate sample data
print("\n1. Generating sample data...")
pipeline = generate_sample_data(n_seasons=5)
print(f" Loaded {len(pipeline.team_stats)} team-seasons")
print(f" Loaded {len(pipeline.bowl_games)} bowl games")
# Train system
print("\n2. Training prediction system...")
system = BowlPredictionSystem()
metrics = system.train(pipeline)
print(f" CV Accuracy: {metrics['cv_accuracy_mean']:.3f} (+/- {metrics['cv_accuracy_std']:.3f})")
# Generate predictions
print("\n3. Generating sample predictions...")
upcoming = [
('Sugar Bowl', 'Alabama', 'Georgia'),
('Rose Bowl', 'Michigan', 'Oregon'),
('Orange Bowl', 'Florida State', 'Clemson')
]
report = system.generate_predictions_report(upcoming)
print("\nPredictions:")
print(report.to_string(index=False))
print("\n" + "=" * 70)
print("DEMONSTRATION COMPLETE")
print("=" * 70)
Key Insights
Model Performance
-
Accuracy vs. Calibration: Raw accuracy matters less than calibration for probability predictions. A well-calibrated model with 60% accuracy is more useful than a poorly calibrated model with 65% accuracy.
-
Feature Importance: SP+ rating differential was the strongest predictor, followed by efficiency metrics. Simple win counts were less predictive after controlling for strength of schedule.
-
Temporal Considerations: Model performance degrades when predicting further in advance. Predictions made closer to game time (with more complete information) are more accurate.
Practical Lessons
-
Keep It Simple: Complex ensemble models only marginally outperformed logistic regression while being harder to explain and maintain.
-
Calibrate Probabilities: Using CalibratedClassifierCV significantly improved the reliability of probability estimates.
-
Update Predictions: Building infrastructure to update predictions as new information (opt-outs, injuries) becomes available is as important as the initial model.
Exercises
-
Add opt-out adjustment: Modify the system to account for player opt-outs.
-
Historical accuracy tracking: Implement a function to track prediction accuracy over multiple seasons.
-
Confidence intervals: Add bootstrap confidence intervals to probability predictions.
Further Reading
- Bill Connelly's SP+ methodology documentation
- ESPN's FPI technical documentation
- Research on bowl game prediction in sports analytics conferences