Chapter 22: Exercises - Machine Learning Applications

Exercise Overview

These exercises progress from basic ML implementation to advanced ensemble and deep learning techniques for football analytics.


Level 1: Foundational Exercises

Exercise 1.1: Basic Classification Pipeline

Build a simple classification pipeline to predict game outcomes.

Task: Implement a logistic regression classifier with proper preprocessing.

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score

# Sample game data
np.random.seed(42)
n_games = 500

games = pd.DataFrame({
    'home_elo': np.random.normal(1500, 200, n_games),
    'away_elo': np.random.normal(1500, 200, n_games),
    'home_off_efficiency': np.random.normal(30, 5, n_games),
    'away_off_efficiency': np.random.normal(30, 5, n_games),
    'home_def_efficiency': np.random.normal(25, 5, n_games),
    'away_def_efficiency': np.random.normal(25, 5, n_games)
})

# Create target (home team wins)
elo_diff = games['home_elo'] - games['away_elo']
eff_diff = (games['home_off_efficiency'] - games['away_def_efficiency']) - \
           (games['away_off_efficiency'] - games['home_def_efficiency'])
win_prob = 1 / (1 + np.exp(-(elo_diff/400 + eff_diff/10 + 0.1)))
games['home_win'] = (np.random.random(n_games) < win_prob).astype(int)

# TODO: Create feature matrix
# TODO: Split into train/test sets
# TODO: Scale features
# TODO: Train logistic regression
# TODO: Evaluate accuracy and AUC

Expected Output:

Training samples: 400
Test samples: 100
Test Accuracy: ~65%
Test AUC: ~0.70

Exercise 1.2: Feature Engineering

Engineer relevant features from raw game data.

Task: Create meaningful features for game prediction.

def engineer_game_features(games: pd.DataFrame) -> pd.DataFrame:
    """
    Engineer features for game outcome prediction.

    Create the following features:
    1. elo_diff: Home - Away Elo
    2. off_advantage: Home offense vs Away defense
    3. def_advantage: Home defense vs Away offense
    4. total_elo: Sum of both team's Elo ratings
    5. elo_ratio: Home Elo / Away Elo
    """
    df = games.copy()

    # TODO: Implement feature engineering
    # df['elo_diff'] = ...
    # df['off_advantage'] = ...
    # df['def_advantage'] = ...
    # df['total_elo'] = ...
    # df['elo_ratio'] = ...

    return df

# Test your implementation
features = engineer_game_features(games)
print(features[['elo_diff', 'off_advantage', 'def_advantage']].describe())

Exercise 1.3: Model Comparison

Compare multiple classification algorithms.

Task: Train and evaluate logistic regression, random forest, and gradient boosting.

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import brier_score_loss

def compare_classifiers(X_train, X_test, y_train, y_test):
    """
    Train and compare three classifiers.

    Returns DataFrame with:
    - Model name
    - Accuracy
    - AUC
    - Brier Score
    """
    models = {
        'Logistic Regression': LogisticRegression(max_iter=1000),
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
        'Gradient Boosting': GradientBoostingClassifier(n_estimators=100, random_state=42)
    }

    results = []
    # TODO: Train each model and collect metrics

    return pd.DataFrame(results)

# Use the engineered features
# results = compare_classifiers(X_train, X_test, y_train, y_test)
# print(results.sort_values('brier_score'))

Level 2: Intermediate Exercises

Exercise 2.1: Temporal Cross-Validation

Implement proper time-based validation for sports data.

Task: Create temporal train/test splits and evaluate model performance over time.

class TemporalValidator:
    """Implement time-based cross-validation."""

    def __init__(self, n_splits: int = 5):
        self.n_splits = n_splits

    def split(self, df: pd.DataFrame, season_col: str = 'season'):
        """
        Generate temporal train/test indices.

        Args:
            df: DataFrame with season column
            season_col: Name of season column

        Yields:
            Tuple of (train_indices, test_indices)
        """
        seasons = sorted(df[season_col].unique())
        # TODO: Implement temporal splitting
        pass

    def cross_val_score(self, model, df, features, target, season_col='season'):
        """
        Perform temporal cross-validation.

        Returns list of scores for each fold.
        """
        scores = []
        # TODO: Implement cross-validation loop
        return scores

# Test with synthetic data
np.random.seed(42)
seasons = list(range(2018, 2024))
games_with_season = games.copy()
games_with_season['season'] = np.random.choice(seasons, len(games))

validator = TemporalValidator(n_splits=3)
# scores = validator.cross_val_score(...)

Exercise 2.2: XGBoost Hyperparameter Tuning

Optimize XGBoost hyperparameters for game prediction.

Task: Use RandomizedSearchCV to find optimal parameters.

import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV

def tune_xgboost(X_train, y_train, n_iter=20):
    """
    Tune XGBoost hyperparameters.

    Search space:
    - n_estimators: [50, 100, 200, 300]
    - max_depth: [3, 4, 5, 6, 7]
    - learning_rate: [0.01, 0.05, 0.1, 0.2]
    - subsample: [0.7, 0.8, 0.9, 1.0]
    - colsample_bytree: [0.7, 0.8, 0.9, 1.0]

    Returns:
    - Best parameters
    - Best score
    - Fitted best estimator
    """
    param_dist = {
        # TODO: Define parameter distribution
    }

    base_model = xgb.XGBClassifier(
        objective='binary:logistic',
        random_state=42,
        use_label_encoder=False,
        eval_metric='logloss'
    )

    # TODO: Implement RandomizedSearchCV
    # TODO: Return results

    pass

# best_params, best_score, best_model = tune_xgboost(X_train, y_train)

Exercise 2.3: Calibration Analysis

Analyze and improve model calibration.

Task: Build calibration curves and apply isotonic regression calibration.

from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import calibration_curve

def analyze_calibration(y_true, y_prob, n_bins=10):
    """
    Analyze model calibration.

    Returns:
    - Calibration DataFrame (bin, predicted, actual, count)
    - Expected Calibration Error (ECE)
    - Maximum Calibration Error (MCE)
    """
    # TODO: Implement calibration analysis
    pass

def calibrate_predictions(y_prob_train, y_train, y_prob_test):
    """
    Apply isotonic regression calibration.

    Returns calibrated probabilities for test set.
    """
    # TODO: Fit isotonic regression on training predictions
    # TODO: Transform test predictions
    pass

# Example usage
# Train a model and get predictions
# model = GradientBoostingClassifier(n_estimators=100)
# model.fit(X_train, y_train)
# y_prob_train = model.predict_proba(X_train)[:, 1]
# y_prob_test = model.predict_proba(X_test)[:, 1]

# Analyze calibration
# cal_results = analyze_calibration(y_test, y_prob_test)
# print(f"ECE: {cal_results['ece']:.4f}")

# Apply calibration
# y_prob_calibrated = calibrate_predictions(y_prob_train, y_train, y_prob_test)

Level 3: Advanced Exercises

Exercise 3.1: Custom Ensemble

Build a custom weighted ensemble based on validation performance.

Task: Implement ensemble that weights models by their Brier score performance.

class WeightedEnsemble:
    """
    Custom ensemble that weights models by validation performance.
    """

    def __init__(self, models: dict):
        """
        Args:
            models: Dict of {name: fitted_model}
        """
        self.models = models
        self.weights = None

    def fit_weights(self, X_val, y_val, metric='brier_score'):
        """
        Fit ensemble weights based on validation performance.

        Models with better performance get higher weights.
        Weights sum to 1.
        """
        # TODO: Calculate performance for each model
        # TODO: Convert to weights (better = higher weight)
        # TODO: Normalize to sum to 1
        pass

    def predict_proba(self, X):
        """
        Get weighted probability predictions.
        """
        # TODO: Get predictions from each model
        # TODO: Weight by model weights
        # TODO: Return weighted average
        pass

    def evaluate(self, X_test, y_test):
        """
        Evaluate ensemble performance.
        """
        # TODO: Get ensemble predictions
        # TODO: Calculate metrics
        pass

# Test implementation
# models = {
#     'logistic': fitted_logistic,
#     'rf': fitted_rf,
#     'gbm': fitted_gbm
# }
# ensemble = WeightedEnsemble(models)
# ensemble.fit_weights(X_val, y_val)
# results = ensemble.evaluate(X_test, y_test)

Exercise 3.2: Player Clustering System

Build a complete player clustering and archetype discovery system.

Task: Cluster quarterbacks based on playing style.

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

class QBArchetypeDiscovery:
    """Discover QB playing style archetypes."""

    def __init__(self, n_clusters: int = 5):
        self.n_clusters = n_clusters
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=2)
        self.kmeans = None

    def prepare_features(self, qbs: pd.DataFrame) -> np.ndarray:
        """
        Prepare QB features for clustering.

        Features to include:
        - completion_pct
        - yards_per_attempt
        - td_rate
        - int_rate
        - rush_yards_per_game
        - sack_rate
        """
        # TODO: Select and scale features
        pass

    def find_optimal_clusters(self, X: np.ndarray, max_k: int = 10):
        """
        Find optimal number of clusters using elbow and silhouette.
        """
        # TODO: Test different k values
        # TODO: Calculate inertia and silhouette scores
        # TODO: Return recommended k
        pass

    def fit_clusters(self, X: np.ndarray):
        """
        Fit KMeans clustering.
        """
        # TODO: Fit KMeans
        # TODO: Return cluster labels
        pass

    def name_clusters(self, qbs: pd.DataFrame, labels: np.ndarray):
        """
        Generate descriptive names for each cluster.
        """
        # TODO: Analyze cluster characteristics
        # TODO: Generate meaningful names
        pass

    def visualize(self, X: np.ndarray, labels: np.ndarray):
        """
        Visualize clusters in 2D using PCA.
        """
        # TODO: Apply PCA
        # TODO: Create scatter plot
        pass

# Sample QB data
np.random.seed(42)
n_qbs = 100
qbs = pd.DataFrame({
    'player_name': [f'QB_{i}' for i in range(n_qbs)],
    'completion_pct': np.random.normal(0.62, 0.05, n_qbs),
    'yards_per_attempt': np.random.normal(7.5, 1.0, n_qbs),
    'td_rate': np.random.normal(0.04, 0.015, n_qbs),
    'int_rate': np.random.normal(0.025, 0.01, n_qbs),
    'rush_yards_per_game': np.random.exponential(20, n_qbs),
    'sack_rate': np.random.normal(0.06, 0.02, n_qbs)
})

# Test your implementation
# archetype_system = QBArchetypeDiscovery(n_clusters=5)
# X = archetype_system.prepare_features(qbs)
# optimal_k = archetype_system.find_optimal_clusters(X)
# labels = archetype_system.fit_clusters(X)
# cluster_names = archetype_system.name_clusters(qbs, labels)

Exercise 3.3: Neural Network Game Predictor

Implement a neural network for game outcome prediction.

Task: Build and train a feed-forward neural network using PyTorch.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class GamePredictionNN(nn.Module):
    """Neural network for game prediction."""

    def __init__(self, input_size: int, hidden_sizes: list = [64, 32]):
        super().__init__()
        # TODO: Build network architecture
        # Input -> Hidden1 -> BatchNorm -> ReLU -> Dropout
        # -> Hidden2 -> BatchNorm -> ReLU -> Dropout
        # -> Output (sigmoid)
        pass

    def forward(self, x):
        # TODO: Implement forward pass
        pass


class NNTrainer:
    """Training loop for neural network."""

    def __init__(self, model, lr=0.001):
        self.model = model
        self.optimizer = optim.Adam(model.parameters(), lr=lr)
        self.criterion = nn.BCELoss()
        self.history = {'train_loss': [], 'val_loss': []}

    def train_epoch(self, train_loader):
        """Train for one epoch."""
        # TODO: Implement training loop
        pass

    def evaluate(self, val_loader):
        """Evaluate on validation set."""
        # TODO: Implement evaluation
        pass

    def train(self, X_train, y_train, X_val, y_val,
              epochs=100, batch_size=32, early_stopping=10):
        """
        Full training loop with early stopping.
        """
        # TODO: Create data loaders
        # TODO: Training loop with early stopping
        pass

    def predict(self, X):
        """Get predictions."""
        # TODO: Inference mode predictions
        pass

# Test implementation
# input_size = X_train.shape[1]
# model = GamePredictionNN(input_size, hidden_sizes=[64, 32])
# trainer = NNTrainer(model, lr=0.001)
# trainer.train(X_train, y_train, X_val, y_val, epochs=100)

Level 4: Expert Challenges

Exercise 4.1: Stacking Ensemble with Custom Meta-Learner

Build a sophisticated stacking ensemble with neural network meta-learner.

Task: Create multi-level ensemble with diverse base learners.

class AdvancedStackingEnsemble:
    """
    Two-level stacking ensemble:
    Level 1: Diverse base models (logistic, RF, XGB, SVM)
    Level 2: Neural network meta-learner
    """

    def __init__(self, base_models: dict, meta_hidden_sizes: list = [32, 16]):
        self.base_models = base_models
        self.meta_hidden_sizes = meta_hidden_sizes
        self.meta_model = None

    def generate_meta_features(self, X, y=None, fit=True):
        """
        Generate meta-features from base model predictions.

        For training (fit=True): Use 5-fold CV to avoid leakage
        For testing (fit=False): Use base model predictions
        """
        # TODO: Implement meta-feature generation
        pass

    def fit(self, X_train, y_train, X_val, y_val):
        """
        Fit stacking ensemble.

        1. Fit base models on training data
        2. Generate meta-features using CV
        3. Train neural network meta-learner
        """
        # TODO: Implement fitting procedure
        pass

    def predict_proba(self, X):
        """
        Get stacked ensemble predictions.
        """
        # TODO: Get base predictions
        # TODO: Feed to meta-learner
        pass

    def analyze_contribution(self, X_val, y_val):
        """
        Analyze contribution of each base model.
        """
        # TODO: Calculate individual and combined performance
        pass

Exercise 4.2: LSTM for Drive Outcome Prediction

Build an LSTM to predict drive outcomes based on play sequences.

Task: Predict touchdown, field goal, punt, or turnover using play history.

import torch.nn as nn

class DriveOutcomeLSTM(nn.Module):
    """
    LSTM for predicting drive outcomes.

    Input: Sequence of plays (each play encoded as feature vector)
    Output: Probability distribution over outcomes
    """

    def __init__(self,
                 input_size: int,
                 hidden_size: int = 64,
                 num_layers: int = 2,
                 num_outcomes: int = 4):  # TD, FG, Punt, TO
        super().__init__()
        # TODO: Build LSTM architecture
        pass

    def forward(self, x, lengths=None):
        """
        Forward pass with optional variable-length sequences.
        """
        # TODO: Implement forward pass
        # Handle variable-length sequences with pack_padded_sequence
        pass


class DriveSequenceDataset:
    """Prepare drive sequences for LSTM training."""

    def __init__(self, max_length: int = 15):
        self.max_length = max_length
        self.outcome_map = {
            'touchdown': 0, 'field_goal': 1,
            'punt': 2, 'turnover': 3
        }

    def encode_play(self, play: dict) -> np.ndarray:
        """
        Encode single play as feature vector.

        Features:
        - down (normalized)
        - distance (normalized)
        - yard_line (normalized)
        - play_type (one-hot: rush, pass, other)
        - yards_gained (normalized)
        - first_down (binary)
        """
        # TODO: Implement play encoding
        pass

    def create_sequences(self, drives: pd.DataFrame):
        """
        Create padded sequences and labels.

        Returns:
        - Sequences (batch, max_length, features)
        - Lengths (actual sequence lengths)
        - Labels (drive outcomes)
        """
        # TODO: Implement sequence creation
        pass

# Training loop
# dataset = DriveSequenceDataset(max_length=15)
# X_seq, lengths, y = dataset.create_sequences(drives)
# model = DriveOutcomeLSTM(input_size=10, hidden_size=64)
# Train model...

Exercise 4.3: Complete ML Pipeline

Build a production-ready ML pipeline for weekly game predictions.

Task: Create end-to-end system from data preparation to prediction output.

class ProductionGamePredictor:
    """
    Production-ready game prediction pipeline.

    Components:
    1. Data preprocessing
    2. Feature engineering
    3. Model ensemble
    4. Calibration
    5. Prediction output
    """

    def __init__(self, model_config: dict):
        self.config = model_config
        self.preprocessor = None
        self.feature_engineer = None
        self.model_ensemble = None
        self.calibrator = None

    def build_pipeline(self):
        """Build complete prediction pipeline."""
        # TODO: Initialize components
        pass

    def train(self, historical_data: pd.DataFrame):
        """
        Train pipeline on historical data.

        Steps:
        1. Preprocess data
        2. Engineer features
        3. Temporal train/val split
        4. Train base models
        5. Build and fit ensemble
        6. Fit calibrator
        7. Save models
        """
        # TODO: Implement training pipeline
        pass

    def predict_week(self, upcoming_games: pd.DataFrame) -> pd.DataFrame:
        """
        Generate predictions for upcoming games.

        Returns DataFrame with:
        - game_id
        - home_team, away_team
        - home_win_prob (calibrated)
        - predicted_winner
        - confidence (high/medium/low)
        - model_agreement (% of models agreeing)
        """
        # TODO: Implement prediction pipeline
        pass

    def evaluate_predictions(self,
                             predictions: pd.DataFrame,
                             actual_results: pd.DataFrame) -> dict:
        """
        Evaluate prediction accuracy.
        """
        # TODO: Calculate accuracy, AUC, Brier, profit
        pass

    def generate_report(self, predictions: pd.DataFrame) -> str:
        """
        Generate prediction report for week.
        """
        # TODO: Create formatted report
        pass

    def save_pipeline(self, filepath: str):
        """Save trained pipeline."""
        # TODO: Serialize all components
        pass

    def load_pipeline(self, filepath: str):
        """Load trained pipeline."""
        # TODO: Deserialize all components
        pass

# Configuration
config = {
    'base_models': ['logistic', 'rf', 'xgb', 'mlp'],
    'ensemble_method': 'weighted_average',
    'calibration': 'isotonic',
    'feature_set': 'full',
    'validation_seasons': 1
}

# predictor = ProductionGamePredictor(config)
# predictor.train(historical_games)
# week_predictions = predictor.predict_week(week_12_games)
# print(predictor.generate_report(week_predictions))

Exercise Solutions

Solutions are provided in the accompanying exercise-solutions.py file.

Submission Guidelines

  1. Code Quality: Use proper Python style, docstrings, and type hints
  2. Documentation: Explain your approach and any design decisions
  3. Testing: Include test cases demonstrating your implementation
  4. Analysis: For advanced exercises, include performance analysis
  5. Comparison: Compare your results to baseline approaches