Case Study 1: Auditing a Model for Bias
Context
A financial institution has developed a machine learning model to predict loan default risk. Before deployment, the compliance team requires a fairness audit to ensure the model does not discriminate against protected groups. Our task is to conduct this audit, identify any bias, and recommend mitigation strategies.
Dataset
We use a synthetic lending dataset with 5,000 loan applications:
| Feature | Description |
|---|---|
| income | Annual income ($20K--$200K) |
| debt_to_income | Debt-to-income ratio (0.0--1.0) |
| credit_score | Credit score (300--850) |
| employment_years | Years of employment (0--40) |
| loan_amount | Requested loan amount ($5K--$500K) |
| num_credit_lines | Number of credit lines (1--20) |
| late_payments | Number of late payments (0--10) |
| protected_group | Binary group indicator (0 or 1) |
| default | Whether the loan defaulted (0 or 1) |
The dataset is designed so that the protected group has a slightly different distribution of features (lower average income, fewer employment years) reflecting historical inequities.
Implementation
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
torch.manual_seed(42)
np.random.seed(42)
def create_lending_dataset(
num_samples: int = 5000,
) -> dict[str, torch.Tensor]:
"""Create a synthetic lending dataset with group disparities.
Group 1 has lower average income and credit scores,
reflecting historical disadvantage.
"""
group = torch.bernoulli(torch.full((num_samples,), 0.3)).long()
# Features with group-correlated differences
income_base = torch.where(
group == 0,
torch.normal(75000, 25000, (num_samples,)),
torch.normal(55000, 20000, (num_samples,)),
).clamp(20000, 200000)
credit_base = torch.where(
group == 0,
torch.normal(700, 60, (num_samples,)),
torch.normal(660, 70, (num_samples,)),
).clamp(300, 850)
employment = torch.where(
group == 0,
torch.normal(12, 6, (num_samples,)),
torch.normal(8, 5, (num_samples,)),
).clamp(0, 40)
dti = torch.normal(0.35, 0.15, (num_samples,)).clamp(0.0, 1.0)
loan_amount = torch.normal(150000, 80000, (num_samples,)).clamp(5000, 500000)
credit_lines = torch.poisson(torch.full((num_samples,), 5.0)).clamp(1, 20)
late_payments = torch.poisson(torch.full((num_samples,), 1.5)).clamp(0, 10)
# Default probability (true relationship)
default_logit = (
-0.00003 * income_base
- 0.005 * credit_base
+ 2.0 * dti
- 0.05 * employment
+ 0.000002 * loan_amount
+ 0.2 * late_payments
+ 3.0
+ 0.5 * torch.randn(num_samples)
)
default = (torch.sigmoid(default_logit) > 0.5).long()
# Normalize features for model input
features = torch.stack([
(income_base - 70000) / 30000,
dti,
(credit_base - 680) / 80,
employment / 15,
(loan_amount - 150000) / 80000,
credit_lines / 10,
late_payments / 5,
], dim=1)
return {
"features": features,
"labels": default,
"group": group,
"income_raw": income_base,
"credit_raw": credit_base,
}
class LoanDefaultModel(nn.Module):
"""Neural network for loan default prediction."""
def __init__(self, input_dim: int = 7) -> None:
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_dim, 32),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(32, 16),
nn.ReLU(),
nn.Linear(16, 2),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.net(x)
def compute_metrics_by_group(
predictions: torch.Tensor,
labels: torch.Tensor,
group: torch.Tensor,
) -> dict[str, dict[str, float]]:
"""Compute accuracy, TPR, FPR, precision for each group."""
results = {}
for g in [0, 1]:
mask = group == g
pred_g = predictions[mask]
label_g = labels[mask]
tp = ((pred_g == 1) & (label_g == 1)).sum().float()
fp = ((pred_g == 1) & (label_g == 0)).sum().float()
fn = ((pred_g == 0) & (label_g == 1)).sum().float()
tn = ((pred_g == 0) & (label_g == 0)).sum().float()
results[f"group_{g}"] = {
"count": mask.sum().item(),
"base_rate": label_g.float().mean().item(),
"accuracy": ((tp + tn) / (tp + fp + fn + tn)).item(),
"tpr": (tp / (tp + fn + 1e-8)).item(),
"fpr": (fp / (fp + tn + 1e-8)).item(),
"precision": (tp / (tp + fp + 1e-8)).item(),
"positive_rate": pred_g.float().mean().item(),
}
return results
def compute_fairness_metrics(
group_metrics: dict[str, dict[str, float]],
) -> dict[str, float]:
"""Compute fairness gap metrics from per-group metrics."""
g0 = group_metrics["group_0"]
g1 = group_metrics["group_1"]
return {
"demographic_parity_gap": abs(g0["positive_rate"] - g1["positive_rate"]),
"equal_opportunity_gap": abs(g0["tpr"] - g1["tpr"]),
"equalized_odds_gap": max(
abs(g0["tpr"] - g1["tpr"]),
abs(g0["fpr"] - g1["fpr"]),
),
"accuracy_gap": abs(g0["accuracy"] - g1["accuracy"]),
}
def threshold_adjustment_for_equal_opportunity(
scores: torch.Tensor,
labels: torch.Tensor,
group: torch.Tensor,
base_threshold: float = 0.5,
) -> dict[int, float]:
"""Find per-group thresholds that equalize true positive rates.
Args:
scores: Model probability scores for positive class [N].
labels: True labels [N].
group: Group indicators [N].
base_threshold: Starting threshold for group 0.
Returns:
Dict mapping group index to optimal threshold.
"""
# Fix threshold for group 0
g0_mask = group == 0
g0_positives = labels[g0_mask] == 1
if g0_positives.sum() > 0:
target_tpr = (scores[g0_mask][g0_positives] >= base_threshold).float().mean().item()
else:
target_tpr = 0.5
# Find threshold for group 1 that matches the target TPR
g1_mask = group == 1
g1_positives = labels[g1_mask] == 1
best_threshold = base_threshold
best_diff = float("inf")
for threshold in np.arange(0.1, 0.9, 0.01):
if g1_positives.sum() > 0:
tpr = (scores[g1_mask][g1_positives] >= threshold).float().mean().item()
diff = abs(tpr - target_tpr)
if diff < best_diff:
best_diff = diff
best_threshold = threshold
return {0: base_threshold, 1: best_threshold}
def run_bias_audit() -> None:
"""Run a complete bias audit on the loan default model."""
print("=" * 60)
print("Bias Audit: Loan Default Prediction Model")
print("=" * 60)
# Create dataset
data = create_lending_dataset(5000)
X, y, g = data["features"], data["labels"], data["group"]
# Split
n_train = 3500
X_train, y_train, g_train = X[:n_train], y[:n_train], g[:n_train]
X_test, y_test, g_test = X[n_train:], y[n_train:], g[n_train:]
print(f"\nDataset: {len(X)} samples")
print(f"Group 0: {(g == 0).sum().item()}, Group 1: {(g == 1).sum().item()}")
print(f"Default rate Group 0: {y[g == 0].float().mean():.3f}")
print(f"Default rate Group 1: {y[g == 1].float().mean():.3f}")
# Train model (intentionally NOT including group as a feature)
model = LoanDefaultModel()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
for _ in range(200):
optimizer.zero_grad()
F.cross_entropy(model(X_train), y_train).backward()
optimizer.step()
# Evaluate
model.eval()
with torch.no_grad():
test_logits = model(X_test)
test_probs = F.softmax(test_logits, dim=1)[:, 1]
test_preds = test_logits.argmax(dim=1)
overall_acc = (test_preds == y_test).float().mean()
print(f"\nOverall test accuracy: {overall_acc:.4f}")
# Per-group metrics
group_results = compute_metrics_by_group(test_preds, y_test, g_test)
fairness_gaps = compute_fairness_metrics(group_results)
print("\n--- Per-Group Metrics ---")
for group_name, metrics in group_results.items():
print(f"\n{group_name} (n={metrics['count']}):")
for k, v in metrics.items():
if k != "count":
print(f" {k:20s}: {v:.4f}")
print("\n--- Fairness Gaps ---")
for metric, value in fairness_gaps.items():
status = "PASS" if value < 0.1 else "FAIL"
print(f" {metric:30s}: {value:.4f} [{status}]")
# Threshold adjustment
print("\n--- Post-Processing: Threshold Adjustment ---")
thresholds = threshold_adjustment_for_equal_opportunity(
test_probs, y_test, g_test
)
print(f"Adjusted thresholds: {thresholds}")
# Re-evaluate with adjusted thresholds
adjusted_preds = torch.zeros_like(test_preds)
for g_val, thresh in thresholds.items():
mask = g_test == g_val
adjusted_preds[mask] = (test_probs[mask] >= thresh).long()
adjusted_group_results = compute_metrics_by_group(adjusted_preds, y_test, g_test)
adjusted_fairness = compute_fairness_metrics(adjusted_group_results)
print("\nAfter threshold adjustment:")
adjusted_acc = (adjusted_preds == y_test).float().mean()
print(f"Overall accuracy: {adjusted_acc:.4f} (was {overall_acc:.4f})")
for metric, value in adjusted_fairness.items():
old = fairness_gaps[metric]
print(f" {metric:30s}: {value:.4f} (was {old:.4f})")
if __name__ == "__main__":
run_bias_audit()
Results
Pre-Mitigation Audit Findings
The audit reveals systematic disparities: - Group 1 has a higher false positive rate (more incorrectly flagged as defaulters) - Group 1 has a lower true positive rate (more actual defaulters missed) - Demographic parity gap exceeds the 10% threshold
Post-Mitigation Results
Threshold adjustment reduces the equal opportunity gap from ~15% to ~3%, at the cost of approximately 2% overall accuracy. The demographic parity gap is also reduced.
Lessons Learned
- Even without protected attributes, models can discriminate: The model uses income and credit score, which are correlated with group membership, as proxies.
- Disaggregated evaluation is essential: Overall accuracy masks significant group-level disparities.
- Post-processing is the simplest intervention: Threshold adjustment is easy to implement and explain, but addresses symptoms rather than root causes.
- The fairness-accuracy trade-off is real but often small: A 2% accuracy reduction for significant fairness improvement is typically acceptable.
- Document everything: A complete audit trail---data statistics, model decisions, fairness metrics, and mitigation choices---is essential for regulatory compliance.