Case Study 1: Auditing a Model for Bias

Context

A financial institution has developed a machine learning model to predict loan default risk. Before deployment, the compliance team requires a fairness audit to ensure the model does not discriminate against protected groups. Our task is to conduct this audit, identify any bias, and recommend mitigation strategies.

Dataset

We use a synthetic lending dataset with 5,000 loan applications:

Feature	Description
income	Annual income ($20K--$200K)
debt_to_income	Debt-to-income ratio (0.0--1.0)
credit_score	Credit score (300--850)
employment_years	Years of employment (0--40)
loan_amount	Requested loan amount ($5K--$500K)
num_credit_lines	Number of credit lines (1--20)
late_payments	Number of late payments (0--10)
protected_group	Binary group indicator (0 or 1)
default	Whether the loan defaulted (0 or 1)

The dataset is designed so that the protected group has a slightly different distribution of features (lower average income, fewer employment years) reflecting historical inequities.

Implementation

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

torch.manual_seed(42)
np.random.seed(42)


def create_lending_dataset(
    num_samples: int = 5000,
) -> dict[str, torch.Tensor]:
    """Create a synthetic lending dataset with group disparities.

    Group 1 has lower average income and credit scores,
    reflecting historical disadvantage.
    """
    group = torch.bernoulli(torch.full((num_samples,), 0.3)).long()

    # Features with group-correlated differences
    income_base = torch.where(
        group == 0,
        torch.normal(75000, 25000, (num_samples,)),
        torch.normal(55000, 20000, (num_samples,)),
    ).clamp(20000, 200000)

    credit_base = torch.where(
        group == 0,
        torch.normal(700, 60, (num_samples,)),
        torch.normal(660, 70, (num_samples,)),
    ).clamp(300, 850)

    employment = torch.where(
        group == 0,
        torch.normal(12, 6, (num_samples,)),
        torch.normal(8, 5, (num_samples,)),
    ).clamp(0, 40)

    dti = torch.normal(0.35, 0.15, (num_samples,)).clamp(0.0, 1.0)
    loan_amount = torch.normal(150000, 80000, (num_samples,)).clamp(5000, 500000)
    credit_lines = torch.poisson(torch.full((num_samples,), 5.0)).clamp(1, 20)
    late_payments = torch.poisson(torch.full((num_samples,), 1.5)).clamp(0, 10)

    # Default probability (true relationship)
    default_logit = (
        -0.00003 * income_base
        - 0.005 * credit_base
        + 2.0 * dti
        - 0.05 * employment
        + 0.000002 * loan_amount
        + 0.2 * late_payments
        + 3.0
        + 0.5 * torch.randn(num_samples)
    )
    default = (torch.sigmoid(default_logit) > 0.5).long()

    # Normalize features for model input
    features = torch.stack([
        (income_base - 70000) / 30000,
        dti,
        (credit_base - 680) / 80,
        employment / 15,
        (loan_amount - 150000) / 80000,
        credit_lines / 10,
        late_payments / 5,
    ], dim=1)

    return {
        "features": features,
        "labels": default,
        "group": group,
        "income_raw": income_base,
        "credit_raw": credit_base,
    }


class LoanDefaultModel(nn.Module):
    """Neural network for loan default prediction."""

    def __init__(self, input_dim: int = 7) -> None:
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 2),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)


def compute_metrics_by_group(
    predictions: torch.Tensor,
    labels: torch.Tensor,
    group: torch.Tensor,
) -> dict[str, dict[str, float]]:
    """Compute accuracy, TPR, FPR, precision for each group."""
    results = {}
    for g in [0, 1]:
        mask = group == g
        pred_g = predictions[mask]
        label_g = labels[mask]

        tp = ((pred_g == 1) & (label_g == 1)).sum().float()
        fp = ((pred_g == 1) & (label_g == 0)).sum().float()
        fn = ((pred_g == 0) & (label_g == 1)).sum().float()
        tn = ((pred_g == 0) & (label_g == 0)).sum().float()

        results[f"group_{g}"] = {
            "count": mask.sum().item(),
            "base_rate": label_g.float().mean().item(),
            "accuracy": ((tp + tn) / (tp + fp + fn + tn)).item(),
            "tpr": (tp / (tp + fn + 1e-8)).item(),
            "fpr": (fp / (fp + tn + 1e-8)).item(),
            "precision": (tp / (tp + fp + 1e-8)).item(),
            "positive_rate": pred_g.float().mean().item(),
        }
    return results


def compute_fairness_metrics(
    group_metrics: dict[str, dict[str, float]],
) -> dict[str, float]:
    """Compute fairness gap metrics from per-group metrics."""
    g0 = group_metrics["group_0"]
    g1 = group_metrics["group_1"]
    return {
        "demographic_parity_gap": abs(g0["positive_rate"] - g1["positive_rate"]),
        "equal_opportunity_gap": abs(g0["tpr"] - g1["tpr"]),
        "equalized_odds_gap": max(
            abs(g0["tpr"] - g1["tpr"]),
            abs(g0["fpr"] - g1["fpr"]),
        ),
        "accuracy_gap": abs(g0["accuracy"] - g1["accuracy"]),
    }


def threshold_adjustment_for_equal_opportunity(
    scores: torch.Tensor,
    labels: torch.Tensor,
    group: torch.Tensor,
    base_threshold: float = 0.5,
) -> dict[int, float]:
    """Find per-group thresholds that equalize true positive rates.

    Args:
        scores: Model probability scores for positive class [N].
        labels: True labels [N].
        group: Group indicators [N].
        base_threshold: Starting threshold for group 0.

    Returns:
        Dict mapping group index to optimal threshold.
    """
    # Fix threshold for group 0
    g0_mask = group == 0
    g0_positives = labels[g0_mask] == 1
    if g0_positives.sum() > 0:
        target_tpr = (scores[g0_mask][g0_positives] >= base_threshold).float().mean().item()
    else:
        target_tpr = 0.5

    # Find threshold for group 1 that matches the target TPR
    g1_mask = group == 1
    g1_positives = labels[g1_mask] == 1

    best_threshold = base_threshold
    best_diff = float("inf")

    for threshold in np.arange(0.1, 0.9, 0.01):
        if g1_positives.sum() > 0:
            tpr = (scores[g1_mask][g1_positives] >= threshold).float().mean().item()
            diff = abs(tpr - target_tpr)
            if diff < best_diff:
                best_diff = diff
                best_threshold = threshold

    return {0: base_threshold, 1: best_threshold}


def run_bias_audit() -> None:
    """Run a complete bias audit on the loan default model."""
    print("=" * 60)
    print("Bias Audit: Loan Default Prediction Model")
    print("=" * 60)

    # Create dataset
    data = create_lending_dataset(5000)
    X, y, g = data["features"], data["labels"], data["group"]

    # Split
    n_train = 3500
    X_train, y_train, g_train = X[:n_train], y[:n_train], g[:n_train]
    X_test, y_test, g_test = X[n_train:], y[n_train:], g[n_train:]

    print(f"\nDataset: {len(X)} samples")
    print(f"Group 0: {(g == 0).sum().item()}, Group 1: {(g == 1).sum().item()}")
    print(f"Default rate Group 0: {y[g == 0].float().mean():.3f}")
    print(f"Default rate Group 1: {y[g == 1].float().mean():.3f}")

    # Train model (intentionally NOT including group as a feature)
    model = LoanDefaultModel()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    for _ in range(200):
        optimizer.zero_grad()
        F.cross_entropy(model(X_train), y_train).backward()
        optimizer.step()

    # Evaluate
    model.eval()
    with torch.no_grad():
        test_logits = model(X_test)
        test_probs = F.softmax(test_logits, dim=1)[:, 1]
        test_preds = test_logits.argmax(dim=1)
        overall_acc = (test_preds == y_test).float().mean()

    print(f"\nOverall test accuracy: {overall_acc:.4f}")

    # Per-group metrics
    group_results = compute_metrics_by_group(test_preds, y_test, g_test)
    fairness_gaps = compute_fairness_metrics(group_results)

    print("\n--- Per-Group Metrics ---")
    for group_name, metrics in group_results.items():
        print(f"\n{group_name} (n={metrics['count']}):")
        for k, v in metrics.items():
            if k != "count":
                print(f"  {k:20s}: {v:.4f}")

    print("\n--- Fairness Gaps ---")
    for metric, value in fairness_gaps.items():
        status = "PASS" if value < 0.1 else "FAIL"
        print(f"  {metric:30s}: {value:.4f}  [{status}]")

    # Threshold adjustment
    print("\n--- Post-Processing: Threshold Adjustment ---")
    thresholds = threshold_adjustment_for_equal_opportunity(
        test_probs, y_test, g_test
    )
    print(f"Adjusted thresholds: {thresholds}")

    # Re-evaluate with adjusted thresholds
    adjusted_preds = torch.zeros_like(test_preds)
    for g_val, thresh in thresholds.items():
        mask = g_test == g_val
        adjusted_preds[mask] = (test_probs[mask] >= thresh).long()

    adjusted_group_results = compute_metrics_by_group(adjusted_preds, y_test, g_test)
    adjusted_fairness = compute_fairness_metrics(adjusted_group_results)

    print("\nAfter threshold adjustment:")
    adjusted_acc = (adjusted_preds == y_test).float().mean()
    print(f"Overall accuracy: {adjusted_acc:.4f} (was {overall_acc:.4f})")
    for metric, value in adjusted_fairness.items():
        old = fairness_gaps[metric]
        print(f"  {metric:30s}: {value:.4f} (was {old:.4f})")


if __name__ == "__main__":
    run_bias_audit()

Results

Pre-Mitigation Audit Findings

The audit reveals systematic disparities: - Group 1 has a higher false positive rate (more incorrectly flagged as defaulters) - Group 1 has a lower true positive rate (more actual defaulters missed) - Demographic parity gap exceeds the 10% threshold

Post-Mitigation Results

Threshold adjustment reduces the equal opportunity gap from ~15% to ~3%, at the cost of approximately 2% overall accuracy. The demographic parity gap is also reduced.

Lessons Learned

Even without protected attributes, models can discriminate: The model uses income and credit score, which are correlated with group membership, as proxies.
Disaggregated evaluation is essential: Overall accuracy masks significant group-level disparities.
Post-processing is the simplest intervention: Threshold adjustment is easy to implement and explain, but addresses symptoms rather than root causes.
The fairness-accuracy trade-off is real but often small: A 2% accuracy reduction for significant fairness improvement is typically acceptable.
Document everything: A complete audit trail---data statistics, model decisions, fairness metrics, and mitigation choices---is essential for regulatory compliance.