Appendix F: Python Reference — Simulation Code

The Science of Luck: Statistical Thinking, Network Theory, Serendipity Engineering, Opportunity Recognition, and the Psychology of Chance

This appendix collects all Python simulation code from the textbook, organized by chapter, with full line-by-line comments, expected output descriptions, instructions for extending each simulation, and dependency information.

All code is written for Python 3.10+. It follows PEP 8 style guidelines and uses descriptive variable names for clarity.

Required dependencies (install once):

pip install numpy matplotlib scipy networkx pandas seaborn

Or using the requirements file from the textbook repository:

pip install -r requirements.txt

Chapter 6: Probability Intuition — Basic Probability Simulations

Simulation 6.1: Coin Flip Convergence

What this demonstrates: The law of large numbers — as you flip more coins, the proportion of heads converges toward 0.5. Short sequences can look very biased.

import numpy as np
import matplotlib.pyplot as plt


def simulate_coin_flips(total_flips: int, random_seed: int = 42) -> None:
    """
    Simulate a series of fair coin flips and plot the running proportion of heads.
    Shows how small samples can appear very biased even when the underlying
    probability is exactly 0.5.

    Args:
        total_flips: Number of coin flips to simulate
        random_seed: For reproducibility; change to see different realizations
    """
    # Set the random seed so results are reproducible
    rng = np.random.default_rng(seed=random_seed)

    # Simulate: 1 = heads, 0 = tails
    # Each flip is an independent Bernoulli trial with p = 0.5
    flips = rng.integers(low=0, high=2, size=total_flips)  # 0 or 1

    # Calculate running proportion of heads after each flip
    # cumsum gives the running total of heads; dividing by flip number gives proportion
    running_heads_count = np.cumsum(flips)
    flip_numbers = np.arange(1, total_flips + 1)
    running_proportion = running_heads_count / flip_numbers

    # Plot the results
    fig, ax = plt.subplots(figsize=(12, 5))

    ax.plot(flip_numbers, running_proportion,
            color='steelblue', linewidth=0.8, label='Observed proportion of heads')

    # Add the true probability line for reference
    ax.axhline(y=0.5, color='red', linestyle='--', linewidth=1.5,
               label='True probability (0.5)')

    # Add shaded region showing +/- 10% band
    ax.axhspan(0.45, 0.55, alpha=0.1, color='red', label='+/- 10% band')

    ax.set_xlabel('Number of flips', fontsize=12)
    ax.set_ylabel('Proportion of heads', fontsize=12)
    ax.set_title(f'Coin Flip Convergence: {total_flips:,} Flips\n'
                 f'Notice how early results can mislead', fontsize=13)
    ax.legend()
    ax.set_ylim(0, 1)
    ax.set_xscale('log')  # Log scale makes early volatility and late convergence both visible

    plt.tight_layout()
    plt.savefig('coin_flip_convergence.png', dpi=150, bbox_inches='tight')
    plt.show()

    # Print summary statistics
    print(f"After {total_flips:,} flips:")
    print(f"  Proportion heads: {running_proportion[-1]:.4f}")
    print(f"  Distance from true probability: {abs(running_proportion[-1] - 0.5):.4f}")
    print(f"\nEarly sampling:")
    print(f"  After 5 flips: {running_proportion[4]:.2f} heads proportion")
    print(f"  After 50 flips: {running_proportion[49]:.2f} heads proportion")
    print(f"  After 500 flips: {running_proportion[499]:.2f} heads proportion")


if __name__ == "__main__":
    simulate_coin_flips(total_flips=10_000)

Expected output: A graph showing the proportion of heads starting volatile and converging toward 0.5 as flips accumulate. Console output showing the proportion at 5, 50, 500, and 10,000 flips.

How to extend: - Change random_seed to see different short runs. - Replace rng.integers(0, 2, total_flips) with rng.binomial(1, 0.6, total_flips) to simulate a biased coin. - Run multiple simulations (10 different seeds) and overlay them to show that all converge.

Simulation 6.2: Dice Roll Expected Value

import numpy as np


def dice_expected_value_simulation(num_rolls: int = 100_000) -> None:
    """
    Roll a fair six-sided die many times and compare the sample mean
    to the theoretical expected value of 3.5.

    The expected value of a fair die = (1+2+3+4+5+6)/6 = 3.5.
    No single roll can produce 3.5, but the average of many rolls converges to it.
    """
    rng = np.random.default_rng(seed=0)

    # Roll the die num_rolls times; integers 1 through 6 inclusive
    rolls = rng.integers(low=1, high=7, size=num_rolls)

    theoretical_ev = 3.5

    print(f"Theoretical expected value of a fair die: {theoretical_ev}")
    print(f"\nSample mean after:")

    # Show convergence at multiple checkpoints
    checkpoints = [10, 100, 1_000, 10_000, 100_000]
    for n in checkpoints:
        if n <= num_rolls:
            sample_mean = np.mean(rolls[:n])
            error = abs(sample_mean - theoretical_ev)
            print(f"  {n:>8,} rolls: mean = {sample_mean:.4f}  (error: {error:.4f})")


if __name__ == "__main__":
    dice_expected_value_simulation()

Expected output (values vary by seed):

Theoretical expected value of a fair die: 3.5

Sample mean after:
        10 rolls: mean = 3.6000  (error: 0.1000)
       100 rolls: mean = 3.4700  (error: 0.0300)
     1,000 rolls: mean = 3.5010  (error: 0.0010)
    10,000 rolls: mean = 3.4986  (error: 0.0014)
   100,000 rolls: mean = 3.5004  (error: 0.0004)

Chapter 7: Law of Large Numbers — Multiple Trajectories

Simulation 7.1: Multiple Coin Flip Trajectories

import numpy as np
import matplotlib.pyplot as plt


def plot_multiple_coin_trajectories(
    num_trajectories: int = 10,
    num_flips: int = 1000
) -> None:
    """
    Simulate multiple independent coin flip sequences and plot all of them.
    Shows that EVERY sequence eventually converges to 0.5 -- but they take
    different paths and look very different in the early stages.

    Key insight: any one trajectory can look like a 'hot streak' in the short run
    while being perfectly fair in the long run.
    """
    rng = np.random.default_rng(seed=123)

    fig, ax = plt.subplots(figsize=(12, 6))
    flip_numbers = np.arange(1, num_flips + 1)

    for trajectory_index in range(num_trajectories):
        # Each trajectory is an independent sequence of fair coin flips
        flips = rng.integers(0, 2, size=num_flips)
        running_proportion = np.cumsum(flips) / flip_numbers

        ax.plot(flip_numbers, running_proportion,
                alpha=0.6,
                linewidth=0.8,
                label=f'Sequence {trajectory_index + 1}')

    # The true probability line
    ax.axhline(y=0.5, color='black', linestyle='--', linewidth=2,
               label='True probability (0.5)', zorder=5)

    ax.set_xlabel('Number of flips', fontsize=12)
    ax.set_ylabel('Running proportion of heads', fontsize=12)
    ax.set_title(f'{num_trajectories} Independent Coin Sequences\n'
                 'All converge to 0.5; each looks different in small samples',
                 fontsize=13)
    ax.set_ylim(0, 1)
    ax.legend(loc='upper right', fontsize=8, ncol=2)

    plt.tight_layout()
    plt.savefig('multiple_coin_trajectories.png', dpi=150)
    plt.show()


if __name__ == "__main__":
    plot_multiple_coin_trajectories()

Expected output: Ten overlapping lines starting chaotically and converging toward 0.5. Each trajectory looks like it has a personality (lucky, unlucky) — but all obey the same underlying probability.

Chapter 8: Regression to the Mean Simulation

Simulation 8.1: Test Score Regression

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats


def simulate_regression_to_mean(
    num_students: int = 500,
    true_ability_mean: float = 70.0,
    true_ability_std: float = 10.0,
    measurement_noise_std: float = 8.0
) -> None:
    """
    Simulate students taking two tests. Each score = true ability + random noise.
    Students who scored highest on Test 1 tend to score lower on Test 2 -- not
    because they declined, but because their Test 1 score was partly inflated
    by lucky noise that is unlikely to repeat at the same magnitude.

    This is regression to the mean: a mathematical inevitability, not a causal effect.
    """
    rng = np.random.default_rng(seed=42)

    # Each student has a stable true ability
    true_abilities = rng.normal(loc=true_ability_mean,
                                scale=true_ability_std,
                                size=num_students)

    # Test 1: true ability + random noise specific to this test occasion
    test1_scores = true_abilities + rng.normal(0, measurement_noise_std, num_students)

    # Test 2: same true ability + fresh random noise
    test2_scores = true_abilities + rng.normal(0, measurement_noise_std, num_students)

    # Identify top and bottom performers on Test 1
    test1_rank = np.argsort(test1_scores)[::-1]
    top_n = int(num_students * 0.10)
    bottom_start = int(num_students * 0.90)

    top_t1 = test1_scores[test1_rank[:top_n]]
    top_t2 = test2_scores[test1_rank[:top_n]]
    bottom_t1 = test1_scores[test1_rank[bottom_start:]]
    bottom_t2 = test2_scores[test1_rank[bottom_start:]]

    # Scatter plot: Test 1 vs Test 2, with regression line
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    axes[0].scatter(test1_scores, test2_scores, alpha=0.3, s=15, color='steelblue')

    # Perfect correlation line (y = x)
    score_range = [min(test1_scores.min(), test2_scores.min()),
                   max(test1_scores.max(), test2_scores.max())]
    axes[0].plot(score_range, score_range, 'r--', label='y = x (no regression)', linewidth=1.5)

    # Actual regression line
    slope, intercept, r_value, _, _ = stats.linregress(test1_scores, test2_scores)
    x_line = np.linspace(score_range[0], score_range[1], 100)
    axes[0].plot(x_line, slope * x_line + intercept, 'g-',
                 label=f'Actual regression (slope={slope:.2f})', linewidth=2)

    axes[0].set_xlabel('Test 1 Score', fontsize=11)
    axes[0].set_ylabel('Test 2 Score', fontsize=11)
    axes[0].set_title('Test 1 vs Test 2\nSlope < 1 reveals regression to the mean', fontsize=11)
    axes[0].legend()

    # Bar chart: average score change for top and bottom performers
    categories = ['Top 10% by Test 1', 'Bottom 10% by Test 1']
    t1_means = [np.mean(top_t1), np.mean(bottom_t1)]
    t2_means = [np.mean(top_t2), np.mean(bottom_t2)]

    x = np.array([0, 1])
    width = 0.35
    axes[1].bar(x - width/2, t1_means, width, label='Test 1', color='steelblue', alpha=0.8)
    axes[1].bar(x + width/2, t2_means, width, label='Test 2', color='coral', alpha=0.8)
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(categories, fontsize=10)
    axes[1].set_ylabel('Average Score', fontsize=11)
    axes[1].set_title('Regression to the Mean\n"Decline" and "Improvement" without real change',
                      fontsize=11)
    axes[1].legend()

    plt.suptitle('Regression to the Mean: A Statistical Inevitability', fontsize=13, fontweight='bold')
    plt.tight_layout()
    plt.savefig('regression_to_mean.png', dpi=150)
    plt.show()

    print("Regression to the Mean Results")
    print("="*50)
    print(f"Top 10% — Test 1 mean: {np.mean(top_t1):.1f} | Test 2 mean: {np.mean(top_t2):.1f} "
          f"| Change: {np.mean(top_t2) - np.mean(top_t1):+.1f}")
    print(f"Bot 10% — Test 1 mean: {np.mean(bottom_t1):.1f} | Test 2 mean: {np.mean(bottom_t2):.1f} "
          f"| Change: {np.mean(bottom_t2) - np.mean(bottom_t1):+.1f}")
    print()
    print("Student ability did NOT change. The changes reflect noise, not real improvement.")


if __name__ == "__main__":
    simulate_regression_to_mean()

Chapter 9: Survivorship Bias Simulation

Simulation 9.1: The Invisible Graveyard

import numpy as np
import matplotlib.pyplot as plt


def simulate_survivorship_bias(
    num_entrepreneurs: int = 10_000,
    success_probability: float = 0.10
) -> None:
    """
    Simulate a population of entrepreneurs where success is purely random
    (the measured 'trait' has zero causal effect). Demonstrates that studying
    only survivors produces apparent trait differences that are pure artifact.

    This is why 'success advice' from successful people is systematically misleading:
    you are sampling a biased subset of all people who tried.
    """
    rng = np.random.default_rng(seed=99)

    # Each entrepreneur has a trait score — drawn from same distribution regardless of outcome
    # In this simulation, the trait has ZERO causal effect on success
    trait_scores = rng.normal(loc=50, scale=15, size=num_entrepreneurs)

    # Success is purely random — completely independent of trait
    successes = rng.random(size=num_entrepreneurs) < success_probability

    survivors_traits = trait_scores[successes]
    failures_traits = trait_scores[~successes]

    survivors_mean = np.mean(survivors_traits)
    population_mean = np.mean(trait_scores)

    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Left: What you see if you only study survivors
    axes[0].hist(survivors_traits, bins=30, color='gold', alpha=0.8,
                 edgecolor='black', density=True)
    axes[0].axvline(x=survivors_mean, color='red', linestyle='--', linewidth=2,
                    label=f'Survivors mean: {survivors_mean:.1f}')
    axes[0].axvline(x=population_mean, color='blue', linestyle='--', linewidth=2,
                    label=f'True population mean: {population_mean:.1f}')
    axes[0].set_title(f'Biased View: Survivors Only\n(n={survivors_traits.size:,}, '
                      f'{success_probability:.0%} of population)', fontsize=11)
    axes[0].set_xlabel('Trait Score')
    axes[0].legend()

    # Right: The full picture
    axes[1].hist(failures_traits, bins=30, color='lightcoral', alpha=0.7,
                 density=True, label=f'Failures (n={failures_traits.size:,})')
    axes[1].hist(survivors_traits, bins=30, color='gold', alpha=0.7,
                 density=True, label=f'Survivors (n={survivors_traits.size:,})')
    axes[1].axvline(x=population_mean, color='blue', linestyle='--', linewidth=2,
                    label=f'Population mean: {population_mean:.1f}')
    axes[1].set_title('Full Picture: Survivors AND Failures\n'
                      '(The trait has ZERO causal effect)', fontsize=11)
    axes[1].set_xlabel('Trait Score')
    axes[1].legend()

    plt.suptitle('Survivorship Bias: Studying Winners Misleads', fontsize=13, fontweight='bold')
    plt.tight_layout()
    plt.savefig('survivorship_bias.png', dpi=150)
    plt.show()

    apparent_advantage = survivors_mean - population_mean
    print("Survivorship Bias Simulation")
    print(f"Survivors mean trait: {survivors_mean:.1f} vs. Population mean: {population_mean:.1f}")
    print(f"Apparent 'trait advantage' of survivors: {apparent_advantage:+.1f} points")
    print("This gap is 100% sampling artifact -- the trait caused nothing.")


if __name__ == "__main__":
    simulate_survivorship_bias()

Chapter 10: Expected Value Calculator

Simulation 10.1: Expected Value Decision Tool

from dataclasses import dataclass
from typing import List


@dataclass
class Outcome:
    """Represents one possible outcome of a decision."""
    description: str
    probability: float  # Must be between 0 and 1; all outcomes should sum to 1.0
    value: float        # Positive = gain, negative = loss


def calculate_expected_value(outcomes: List[Outcome]) -> float:
    """
    Calculate the expected value of a set of outcomes.
    EV = sum of (probability * value) for each outcome.
    """
    total_prob = sum(o.probability for o in outcomes)
    if abs(total_prob - 1.0) > 0.001:
        print(f"WARNING: Probabilities sum to {total_prob:.4f}, not 1.0")

    return sum(o.probability * o.value for o in outcomes)


def display_ev_analysis(decision_name: str, outcomes: List[Outcome]) -> None:
    """Display a complete expected value analysis."""
    ev = calculate_expected_value(outcomes)

    print(f"\nExpected Value Analysis: {decision_name}")
    print("=" * 60)
    print(f"{'Outcome':<30} {'P':<8} {'Value':<10} {'P x Value':<10}")
    print("-" * 60)

    for o in outcomes:
        weighted = o.probability * o.value
        print(f"{o.description:<30} {o.probability:<8.3f} {o.value:<10.2f} {weighted:<10.2f}")

    print("-" * 60)
    print(f"{'EXPECTED VALUE':<50} {ev:<10.2f}")
    print()

    if ev > 0:
        print(f"Result: POSITIVE expected value (+{ev:.2f}) -- favorable bet in the long run.")
    elif ev < 0:
        print(f"Result: NEGATIVE expected value ({ev:.2f}) -- unfavorable bet in the long run.")
    else:
        print("Result: BREAK-EVEN (fair bet, no long-run advantage).")


if __name__ == "__main__":

    # Example 1: Should Priya attend a $400 conference?
    display_ev_analysis(
        "Priya's Industry Conference (net of $400 cost)",
        [
            Outcome("High-value introduction leads to job offer", 0.15, 5000),
            Outcome("Useful contacts, some leads", 0.35, 800),
            Outcome("Minimal direct value", 0.50, 0),
        ]
    )
    # Subtract cost manually
    ev_before_cost = 0.15*5000 + 0.35*800 + 0.50*0
    print(f"Gross EV: ${ev_before_cost:.0f}, minus $400 cost = Net EV: ${ev_before_cost-400:.0f}")

    # Example 2: Dr. Yuki's poker flush draw
    flush_probability = 9 / 46  # 9 outs, 46 unseen cards
    pot_size = 200
    call_amount = 30
    display_ev_analysis(
        "Dr. Yuki's Flush Draw on the River",
        [
            Outcome("Flush completes (win pot)", flush_probability, pot_size),
            Outcome("Flush misses (lose call)", 1 - flush_probability, -call_amount),
        ]
    )

    # Example 3: Nadia's paid workshop application ($200 entry fee, $2000 prize)
    display_ev_analysis(
        "Nadia's Content Creator Competition",
        [
            Outcome("Win ($2000 prize)", 0.08, 2000 - 200),
            Outcome("Top 10 (exposure value only)", 0.20, 300 - 200),
            Outcome("No placement", 0.72, -200),
        ]
    )

Chapter 11: Birthday Problem and Monty Hall

Simulation 11.1: Birthday Problem

import numpy as np
import matplotlib.pyplot as plt


def birthday_problem_simulation(max_group_size: int = 60, num_simulations: int = 10_000) -> None:
    """
    Estimate the probability of a shared birthday for different group sizes.
    The surprising result: only 23 people needed for >50% probability.
    """
    rng = np.random.default_rng(seed=777)

    group_sizes = list(range(2, max_group_size + 1))
    simulated_probs = []
    theoretical_probs = []

    for n in group_sizes:
        # Simulate num_simulations groups of size n
        birthdays = rng.integers(1, 366, size=(num_simulations, n))
        has_shared = np.array([len(np.unique(birthdays[i])) < n
                               for i in range(num_simulations)])
        simulated_probs.append(np.mean(has_shared))

        # Theoretical: P(shared) = 1 - P(no shared)
        p_no_shared = np.prod([(365 - i) / 365 for i in range(n)])
        theoretical_probs.append(1 - p_no_shared)

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(group_sizes, simulated_probs, 'steelblue', linewidth=1.5, label='Simulated')
    ax.plot(group_sizes, theoretical_probs, 'r--', linewidth=1.5, label='Theoretical')
    ax.axhline(y=0.5, color='gray', linestyle=':', linewidth=1)
    ax.axvline(x=23, color='green', linestyle=':', linewidth=1)
    ax.annotate('23 people -> 50.7%', xy=(23, 0.5), xytext=(30, 0.35),
                arrowprops=dict(arrowstyle='->'), fontsize=10, color='darkgreen')
    ax.set_xlabel('Group Size')
    ax.set_ylabel('P(at least one shared birthday)')
    ax.set_title('The Birthday Problem')
    ax.legend()
    plt.tight_layout()
    plt.savefig('birthday_problem.png', dpi=150)
    plt.show()

    print("Birthday Problem Key Values:")
    for n in [10, 20, 23, 30, 40, 50]:
        idx = n - 2
        print(f"  {n} people: simulated={simulated_probs[idx]:.3f}, theoretical={theoretical_probs[idx]:.3f}")


if __name__ == "__main__":
    birthday_problem_simulation()

Simulation 11.2: Monty Hall Problem

import numpy as np


def monty_hall_simulation(num_games: int = 100_000) -> None:
    """
    Simulate the Monty Hall problem.
    STAY wins ~33% of the time. SWITCH wins ~67% of the time.

    Counterintuitive because most people think it's 50-50 after Monty reveals.
    It's not: the initial pick retains its 1/3 probability; the switch door
    absorbs the remaining 2/3.
    """
    rng = np.random.default_rng(seed=404)

    car_locations = rng.integers(0, 3, size=num_games)    # Where the car is
    initial_picks = rng.integers(0, 3, size=num_games)    # Player's first pick

    stay_wins = 0
    switch_wins = 0

    for game in range(num_games):
        car = car_locations[game]
        pick = initial_picks[game]

        # Monty opens a door: not the player's pick, not the car
        can_open = [d for d in [0, 1, 2] if d != pick and d != car]
        monty_opens = int(rng.choice(can_open))

        # Stay strategy: player keeps original pick
        if pick == car:
            stay_wins += 1

        # Switch strategy: player moves to the other closed door
        switch_to = [d for d in [0, 1, 2] if d != pick and d != monty_opens][0]
        if switch_to == car:
            switch_wins += 1

    print("Monty Hall Simulation Results")
    print(f"Games: {num_games:,}")
    print(f"STAY wins:   {stay_wins:,} ({stay_wins/num_games:.1%})  [Theoretical: 33.3%]")
    print(f"SWITCH wins: {switch_wins:,} ({switch_wins/num_games:.1%})  [Theoretical: 66.7%]")
    print()
    print("Conclusion: ALWAYS SWITCH. Switching is approximately twice as likely to win.")


if __name__ == "__main__":
    monty_hall_simulation()

Expected output:

Monty Hall Simulation Results
Games: 100,000
STAY wins:   33,289 (33.3%)  [Theoretical: 33.3%]
SWITCH wins: 66,711 (66.7%)  [Theoretical: 66.7%]

Chapter 20: Network Graph with NetworkX

Simulation 20.1: Small-World vs. Random Network

import networkx as nx
import matplotlib.pyplot as plt
import numpy as np


def compare_network_topologies(num_nodes: int = 50, seed: int = 42) -> None:
    """
    Create and compare three network types to illustrate the Watts-Strogatz insight:
    - Regular lattice: high clustering, LONG paths
    - Small-world: high clustering, SHORT paths (achieved by a few random rewirings)
    - Random: low clustering, short paths

    The small-world network achieves what seems impossible: keeping communities tight
    while making everyone close to everyone else (like 'six degrees of separation').
    """
    k_neighbors = 6  # Each node connects to k nearest neighbors in the lattice

    # 1. Regular ring lattice (no randomness)
    lattice = nx.watts_strogatz_graph(n=num_nodes, k=k_neighbors, p=0.0, seed=seed)

    # 2. Small-world: rewire 5% of edges randomly (a tiny fraction)
    small_world = nx.watts_strogatz_graph(n=num_nodes, k=k_neighbors, p=0.05, seed=seed)

    # 3. Erdos-Renyi random graph with comparable edge density
    random_graph = nx.erdos_renyi_graph(n=num_nodes, p=k_neighbors/num_nodes, seed=seed)

    networks = {
        'Regular Lattice': lattice,
        'Small-World (p=0.05)': small_world,
        'Random Graph': random_graph,
    }

    fig, axes = plt.subplots(1, 3, figsize=(18, 6))

    for ax, (title, G) in zip(axes, networks.items()):
        # Ensure connectivity for metrics (use largest component if disconnected)
        if nx.is_connected(G):
            apl = nx.average_shortest_path_length(G)
        else:
            lcc = G.subgraph(max(nx.connected_components(G), key=len))
            apl = nx.average_shortest_path_length(lcc)

        cc = nx.average_clustering(G)

        pos = nx.circular_layout(G)
        nx.draw_networkx(G, pos=pos, ax=ax,
                        node_size=80, node_color='steelblue',
                        edge_color='gray', width=0.5,
                        with_labels=False, alpha=0.8)

        ax.set_title(f'{title}\nAvg path length: {apl:.2f}  |  Clustering: {cc:.3f}',
                     fontsize=10)
        ax.axis('off')

    plt.suptitle('Network Topologies: The Watts-Strogatz Insight', fontsize=13, fontweight='bold')
    plt.tight_layout()
    plt.savefig('network_topologies.png', dpi=150)
    plt.show()

    print("Network Comparison:")
    print(f"{'Type':<25} {'Avg Path Length':<20} {'Clustering Coeff':<20}")
    print("-" * 65)
    for title, G in networks.items():
        if nx.is_connected(G):
            apl = nx.average_shortest_path_length(G)
        else:
            lcc = G.subgraph(max(nx.connected_components(G), key=len))
            apl = nx.average_shortest_path_length(lcc)
        cc = nx.average_clustering(G)
        print(f"{title:<25} {apl:<20.3f} {cc:<20.3f}")

    print()
    print("Key insight: Small-world rewiring (just 5% of edges) dramatically")
    print("shortens path lengths while barely reducing clustering.")
    print("This explains six degrees of separation in large social networks.")


if __name__ == "__main__":
    compare_network_topologies()

Chapter 22: Viral Spread Simulation

Simulation 22.1: Content Virality Model

import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass


@dataclass
class ContentPost:
    """Represents a piece of content on a social media platform."""
    quality_score: float          # Intrinsic quality 0-1 (affects sharing rate)
    initial_reach: int            # Starting audience (current followers who see it)
    viral_coefficient: float      # Average shares per viewer (k-factor; k>1 means viral)
    platform_boost_chance: float  # Probability of algorithmic amplification per cycle


def simulate_virality(post: ContentPost, num_steps: int = 10, num_sims: int = 1000,
                      seed: int = 22) -> None:
    """
    Show that the same quality content produces wildly different reach outcomes
    due to random variation in early sharing. Early luck (who shares first) gets
    amplified by the platform algorithm.

    Viral coefficient (k):
    - k < 1: spread dies out (most posts)
    - k = 1: stable plateau
    - k > 1: exponential growth
    """
    rng = np.random.default_rng(seed=seed)

    all_trajectories = np.zeros((num_sims, num_steps + 1))
    all_trajectories[:, 0] = post.initial_reach

    for sim in range(num_sims):
        reach = float(post.initial_reach)
        for step in range(1, num_steps + 1):
            # New views = Poisson(reach * viral_coefficient)
            # Poisson captures randomness: sometimes more shares, sometimes fewer
            new_views = rng.poisson(lam=max(reach * post.viral_coefficient, 0))

            # Random platform boost (algorithm picks this content up)
            if rng.random() < post.platform_boost_chance:
                boost = rng.uniform(2, 10)
                new_views = int(new_views * boost)

            reach = new_views
            all_trajectories[sim, step] = reach

    peak_reaches = np.max(all_trajectories, axis=1)

    fig, axes = plt.subplots(1, 2, figsize=(14, 6))

    # Spaghetti plot of trajectories
    time_steps = range(num_steps + 1)
    for sim in range(min(150, num_sims)):
        axes[0].plot(time_steps, all_trajectories[sim], alpha=0.08,
                    color='steelblue', linewidth=0.7)

    axes[0].plot(time_steps, np.median(all_trajectories, axis=0), 'red', linewidth=2,
                label='Median')
    axes[0].plot(time_steps, np.percentile(all_trajectories, 95, axis=0), 'green',
                linewidth=2, linestyle='--', label='95th percentile (viral)')
    axes[0].set_xlabel('Time steps (sharing cycles)')
    axes[0].set_ylabel('Content Reach')
    axes[0].set_title(f'Content Spread: {num_sims} Simulations\n'
                      f'Same content, different luck')
    axes[0].legend()

    # Distribution of peak reach (log scale)
    log_peaks = np.log10(peak_reaches + 1)
    axes[1].hist(log_peaks, bins=40, color='steelblue', alpha=0.8, edgecolor='black')
    for pct, label, color in [(50, 'Median', 'red'), (90, '90th pct', 'orange'),
                               (99, '99th pct', 'green')]:
        val = np.percentile(peak_reaches, pct)
        axes[1].axvline(x=np.log10(val+1), color=color, linestyle='--',
                       label=f'{label}: {val:,.0f}')
    axes[1].set_xlabel('Log10(Peak Reach)')
    axes[1].set_ylabel('Number of simulations')
    axes[1].set_title('Distribution of Peak Reach\n(power law tail visible on log scale)')
    axes[1].legend()

    plt.suptitle('The Luck of Virality: Same Content, Different Outcomes', fontsize=13)
    plt.tight_layout()
    plt.savefig('virality_simulation.png', dpi=150)
    plt.show()

    print("Virality Simulation Summary:")
    for pct in [25, 50, 75, 90, 95, 99]:
        print(f"  {pct}th percentile peak reach: {np.percentile(peak_reaches, pct):>10,.0f}")
    print(f"\n  Mean vs. Median: {np.mean(peak_reaches):,.0f} vs. {np.median(peak_reaches):,.0f}")
    print("  Large gap = power law distribution driven by rare viral outliers.")


if __name__ == "__main__":
    # Nadia's content with moderate quality and sub-viral coefficient
    nadia_post = ContentPost(
        quality_score=0.70,
        initial_reach=500,
        viral_coefficient=0.15,
        platform_boost_chance=0.03
    )
    simulate_virality(nadia_post, num_sims=2000)

Chapter 25: Opportunity Surface Calculator

Simulation 25.1: Opportunity Surface Scoring

from dataclasses import dataclass
from typing import List


@dataclass
class Context:
    """A context (physical or digital environment) you inhabit."""
    name: str
    frequency_per_month: int       # Sessions per month
    new_people_per_session: int    # Average new people encountered
    diversity_score: float         # How different are these people? 0-1
    engagement_depth: float        # Active participation depth 0-1


def opportunity_surface_score(contexts: List[Context]) -> None:
    """
    Calculate and display opportunity surface across all inhabited contexts.

    Opportunity Surface = sum of (frequency x new_people x diversity x engagement)
    All four factors multiply because missing any one collapses the score:
    - Frequency: can't benefit from a context you don't visit
    - New people: require new people for lucky encounters
    - Diversity: need different-circle people for weak tie benefit
    - Engagement: passive consumption doesn't create serendipity
    """
    print("Opportunity Surface Audit")
    print("=" * 70)
    print(f"{'Context':<22} {'Monthly':<10} {'Diversity':<10} {'Engage':<10} {'Score':<10}")
    print("-" * 70)

    scores = []
    total = 0.0

    for ctx in contexts:
        monthly_encounters = ctx.frequency_per_month * ctx.new_people_per_session
        score = monthly_encounters * ctx.diversity_score * ctx.engagement_depth
        scores.append((ctx.name, score))
        total += score
        print(f"{ctx.name:<22} {monthly_encounters:<10,} {ctx.diversity_score:<10.2f} "
              f"{ctx.engagement_depth:<10.2f} {score:<10.1f}")

    print("-" * 70)
    print(f"{'TOTAL OPPORTUNITY SURFACE':<52} {total:<10.1f}")

    print()
    scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True)
    print("Top three opportunity sources:")
    for rank, (name, score) in enumerate(scores_sorted[:3], 1):
        pct = score / total * 100
        print(f"  {rank}. {name}: {score:.1f} points ({pct:.0f}% of total)")

    print()
    if total < 100:
        print("Score: LOW (<100). Add at least one high-diversity context immediately.")
    elif total < 500:
        print("Score: MODERATE (100-500). Increase engagement depth in top contexts.")
    elif total < 1500:
        print("Score: GOOD (500-1500). Maintain consistency and deepen new contacts.")
    else:
        print("Score: EXCELLENT (1500+). Focus quality over quantity.")


if __name__ == "__main__":
    # Priya before luck science
    priya_before = [
        Context("Home (isolated)",          30, 0, 0.0, 0.1),
        Context("LinkedIn (passive)",        20, 0, 0.3, 0.1),
        Context("Close friend group",         8, 0, 0.1, 0.8),
        Context("Online job boards",         20, 0, 0.4, 0.2),
    ]

    print("PRIYA -- Before:")
    opportunity_surface_score(priya_before)

    print("\n" + "="*70 + "\n")

    # Priya after applying luck architecture
    priya_after = [
        Context("Industry meetups (4x/mo)",  4, 15, 0.8, 0.9),
        Context("LinkedIn (active)",         20,  3, 0.7, 0.7),
        Context("Professional Slack",        20,  2, 0.7, 0.8),
        Context("Industry conference",        1, 50, 0.9, 0.9),
        Context("1-on-1 coffee meetings",     4,  1, 0.8, 1.0),
        Context("Online course community",    8,  2, 0.6, 0.7),
        Context("Close friend group",         8,  1, 0.2, 0.8),
    ]

    print("PRIYA -- After Luck Architecture:")
    opportunity_surface_score(priya_after)

Simulation 34.1: Content Performance Tracker

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta


def generate_content_performance_data(num_posts: int = 60, seed: int = 34) -> pd.DataFrame:
    """
    Generate simulated content performance data representing Nadia's 60-post dataset.
    Models the reality that performance is a mix of quality signal + random noise,
    with occasional algorithmic boosts producing outlier results.
    """
    rng = np.random.default_rng(seed=seed)

    # Post dates over 60 days
    start_date = datetime(2025, 1, 1)
    dates = [start_date + timedelta(days=i) for i in range(num_posts)]

    # Simulate an improving quality trajectory (Nadia gets better over time)
    base_quality = np.linspace(0.4, 0.7, num_posts)  # Gradual improvement

    # Views = quality * base_reach * random_multiplier
    base_reach = 500
    random_multiplier = rng.lognormal(mean=0, sigma=0.8, size=num_posts)
    algorithmic_boost = np.where(rng.random(num_posts) < 0.05,
                                  rng.uniform(3, 10, num_posts),
                                  1.0)

    views = (base_quality * base_reach * random_multiplier * algorithmic_boost).astype(int)
    likes = (views * rng.uniform(0.03, 0.08, num_posts)).astype(int)
    shares = (views * rng.uniform(0.005, 0.02, num_posts)).astype(int)
    comments = (views * rng.uniform(0.002, 0.01, num_posts)).astype(int)

    # Post type categories
    post_types = rng.choice(['Tutorial', 'Story', 'Opinion', 'Behind-the-scenes'],
                             size=num_posts, p=[0.3, 0.3, 0.2, 0.2])

    return pd.DataFrame({
        'date': dates,
        'post_type': post_types,
        'views': views,
        'likes': likes,
        'shares': shares,
        'comments': comments,
        'engagement_rate': (likes + shares + comments) / np.maximum(views, 1)
    })


def analyze_content_luck(df: pd.DataFrame) -> None:
    """
    Analyze content performance data to distinguish signal from noise.
    Applies survivorship bias awareness and regression to the mean concepts.
    """
    print("Content Performance Analysis: Nadia's 60-Post Dataset")
    print("=" * 60)
    print()

    # Overall statistics
    print("Overall Statistics:")
    print(f"  Median views: {df['views'].median():,.0f}")
    print(f"  Mean views:   {df['views'].mean():,.0f}  (higher than median = skewed by outliers)")
    print(f"  Max views:    {df['views'].max():,.0f}")
    print(f"  % posts over 1,000 views: {(df['views'] > 1000).mean():.1%}")
    print()

    # Performance by post type
    print("Performance by Post Type:")
    type_stats = df.groupby('post_type')['views'].agg(['median', 'mean', 'count'])
    type_stats.columns = ['Median Views', 'Mean Views', 'Count']
    print(type_stats.to_string())
    print()

    # Regression to the mean: do top-performing posts predict next post?
    df_sorted = df.sort_values('date').reset_index(drop=True)
    top_posts = df_sorted[df_sorted['views'] > df_sorted['views'].quantile(0.90)]
    print("Regression to the Mean Check:")
    print(f"  Top 10% posts average views: {top_posts['views'].mean():,.0f}")
    print(f"  Those posts' NEXT post average views: "
          f"{df_sorted.loc[top_posts.index + 1, 'views'].dropna().mean():,.0f}")
    print("  (Lower next-post average = regression toward the mean)")
    print()

    # Visualize
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    # Views over time with trend line
    axes[0, 0].scatter(range(len(df)), df['views'], alpha=0.6, color='steelblue', s=30)
    # Simple trend line
    z = np.polyfit(range(len(df)), df['views'], 1)
    p = np.poly1d(z)
    axes[0, 0].plot(range(len(df)), p(range(len(df))), 'r--', linewidth=2, label='Trend')
    axes[0, 0].set_title('Views Over Time\n(Noise obscures the upward trend)')
    axes[0, 0].set_xlabel('Post Number')
    axes[0, 0].set_ylabel('Views')
    axes[0, 0].legend()

    # Distribution of views (shows power law tail)
    axes[0, 1].hist(df['views'], bins=20, color='steelblue', alpha=0.8, edgecolor='black')
    axes[0, 1].axvline(df['views'].median(), color='red', linestyle='--', label='Median')
    axes[0, 1].axvline(df['views'].mean(), color='orange', linestyle='--', label='Mean')
    axes[0, 1].set_title('Views Distribution\n(Right-skewed: most posts low, few viral)')
    axes[0, 1].set_xlabel('Views')
    axes[0, 1].legend()

    # Views by post type (box plots)
    post_types = df['post_type'].unique()
    views_by_type = [df[df['post_type'] == pt]['views'].values for pt in post_types]
    axes[1, 0].boxplot(views_by_type, labels=post_types)
    axes[1, 0].set_title('Views by Post Type')
    axes[1, 0].set_ylabel('Views')

    # Engagement rate trend
    rolling_engagement = df['engagement_rate'].rolling(window=7, min_periods=1).mean()
    axes[1, 1].plot(range(len(df)), df['engagement_rate'], alpha=0.4, color='steelblue')
    axes[1, 1].plot(range(len(df)), rolling_engagement, 'red', linewidth=2,
                   label='7-post rolling average')
    axes[1, 1].set_title('Engagement Rate Over Time')
    axes[1, 1].set_xlabel('Post Number')
    axes[1, 1].set_ylabel('Engagement Rate')
    axes[1, 1].legend()

    plt.suptitle("Nadia's Content Luck Analysis: Signal vs. Noise", fontsize=13)
    plt.tight_layout()
    plt.savefig('content_analysis.png', dpi=150)
    plt.show()


if __name__ == "__main__":
    df = generate_content_performance_data()
    analyze_content_luck(df)

Chapter 36: Luck Audit Radar Chart

Simulation 36.1: Luck Audit Visualizer

import numpy as np
import matplotlib.pyplot as plt


def visualize_luck_audit(scores: dict, title: str = "Luck Audit") -> None:
    """
    Display luck audit scores as a radar (spider) chart.

    Args:
        scores: Dict mapping domain names to scores 1-10
        title: Chart title
    """
    domains = list(scores.keys())
    values = list(scores.values())
    n = len(domains)

    # Angles for each domain (evenly spaced around the circle)
    angles = np.linspace(0, 2 * np.pi, n, endpoint=False).tolist()

    # Close the polygon by repeating the first value
    values_plot = values + [values[0]]
    angles_plot = angles + [angles[0]]

    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))

    # User's scores
    ax.fill(angles_plot, values_plot, alpha=0.25, color='steelblue')
    ax.plot(angles_plot, values_plot, 'o-', color='steelblue', linewidth=2, label='Your scores')

    # Target line (score = 7 = "good")
    target = [7] * n + [7]
    ax.plot(angles_plot, target, '--', color='green', alpha=0.5, linewidth=1.5,
            label='Good target (7/10)')

    # Max line
    max_line = [10] * n + [10]
    ax.plot(angles_plot, max_line, ':', color='gray', alpha=0.3)

    ax.set_xticks(angles)
    ax.set_xticklabels(domains, fontsize=10)
    ax.set_ylim(0, 10)
    ax.set_yticks([2, 4, 6, 8, 10])
    ax.grid(color='gray', alpha=0.3)
    ax.set_title(f'{title}\nTotal: {sum(values)}/70', fontsize=13, fontweight='bold', pad=20)
    ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))

    plt.tight_layout()
    filename = title.lower().replace(' ', '_').replace(':', '') + '.png'
    plt.savefig(filename, dpi=150, bbox_inches='tight')
    plt.show()

    # Analysis
    total = sum(values)
    sorted_domains = sorted(scores.items(), key=lambda x: x[1])
    print(f"\n{title}: Total {total}/70")
    print(f"Highest leverage (lowest scores): {', '.join(d for d, _ in sorted_domains[:2])}")
    print(f"Strongest areas: {', '.join(d for d, _ in sorted_domains[-2:])}")


if __name__ == "__main__":
    # Priya's audit trajectory
    priya_before = {
        "Network Quality": 3,
        "Opportunity Surface": 4,
        "Mindset": 5,
        "Skills": 6,
        "Attention Quality": 5,
        "Timing Awareness": 4,
        "Resilience": 6
    }
    visualize_luck_audit(priya_before, "Priya: Before Luck Science")

    priya_after = {
        "Network Quality": 7,
        "Opportunity Surface": 8,
        "Mindset": 7,
        "Skills": 7,
        "Attention Quality": 7,
        "Timing Awareness": 6,
        "Resilience": 8
    }
    visualize_luck_audit(priya_after, "Priya: Six Months Later")

Chapter 40: Personal Luck Strategy Planner

Simulation 40.1: 90-Day Action Priority Calculator

from dataclasses import dataclass
from typing import List


@dataclass
class LuckAction:
    """A specific luck-architecture action to implement."""
    description: str
    domain: str
    effort_required: int       # 1-10 scale
    expected_impact: int       # 1-10 scale
    timeline_days: int         # Days until results visible
    reversible: bool


def priority_score(action: LuckAction) -> float:
    """
    Score = (impact / effort) * reversibility_bonus * urgency_factor.
    Higher = higher priority.
    """
    efficiency = action.expected_impact / max(action.effort_required, 1)
    reversibility_bonus = 1.1 if action.reversible else 1.0
    urgency_factor = 1.3 if action.timeline_days < 30 else (1.1 if action.timeline_days < 60 else 1.0)
    return efficiency * reversibility_bonus * urgency_factor


def generate_90_day_plan(actions: List[LuckAction], person_name: str = "You") -> None:
    """Generate a prioritized 90-day luck action plan."""
    scored = sorted([(a, priority_score(a)) for a in actions], key=lambda x: x[1], reverse=True)

    print(f"\n90-Day Luck Activation Plan: {person_name}")
    print("=" * 65)

    for phase_label, day_range in [("PHASE 1 (Days 1-30)", (0, 30)),
                                     ("PHASE 2 (Days 31-60)", (30, 60)),
                                     ("PHASE 3 (Days 61-90)", (60, 91))]:
        phase_actions = [(a, s) for a, s in scored
                        if day_range[0] < a.timeline_days <= day_range[1]][:3]
        if not phase_actions:
            # Fall through remaining actions
            phase_actions = scored[:3]

        print(f"\n{phase_label}:")
        print("-" * 65)
        for rank, (action, score) in enumerate(phase_actions, 1):
            print(f"  {rank}. [{action.domain}] {action.description}")
            print(f"     Impact {action.expected_impact}/10 | Effort {action.effort_required}/10 "
                  f"| Results in {action.timeline_days} days | Priority: {score:.2f}")


if __name__ == "__main__":
    # Nadia's 90-day plan at Chapter 40
    nadia_plan = [
        LuckAction("Start luck journal — daily for 30 days",
                  "Mindset", 2, 7, 30, True),
        LuckAction("Attend one creator meetup/event",
                  "Opportunity Surface", 3, 7, 14, True),
        LuckAction("Reactivate 5 creator weak ties with genuine messages",
                  "Network", 2, 6, 14, True),
        LuckAction("Post first 'behind the scenes' serendipity hook post",
                  "Opportunity Surface", 3, 6, 7, True),
        LuckAction("Join one niche community in her content area",
                  "Network", 2, 7, 30, True),
        LuckAction("Deep dive course: platform algorithm mechanics",
                  "Skills", 5, 8, 45, True),
        LuckAction("Establish weekly content + data review routine",
                  "Attention Quality", 3, 7, 30, True),
        LuckAction("Build editorial content calendar for 60 days",
                  "Timing Awareness", 4, 6, 45, True),
        LuckAction("Pitch one collaboration to mid-sized creator",
                  "Network", 5, 8, 45, True),
    ]

    generate_90_day_plan(nadia_plan, "Nadia")

Quick Start: Dependency Verification

"""
verify_setup.py — Run this first to confirm all dependencies are installed.

Usage:
    python verify_setup.py
"""
import importlib
import sys


REQUIRED = [
    "numpy",
    "matplotlib",
    "scipy",
    "networkx",
    "pandas",
    "seaborn",
]


def verify() -> None:
    print(f"Python {sys.version}")
    if sys.version_info < (3, 10):
        print("WARNING: Python 3.10+ required.")
    else:
        print("Python version: OK")

    print()
    all_ok = True
    for pkg in REQUIRED:
        try:
            mod = importlib.import_module(pkg)
            version = getattr(mod, "__version__", "unknown")
            print(f"  OK  {pkg} ({version})")
        except ImportError:
            print(f"  MISSING  {pkg}  -- install: pip install {pkg}")
            all_ok = False

    print()
    if all_ok:
        print("All dependencies installed. Ready to run simulations.")
    else:
        print("Install all missing packages:")
        print("  pip install numpy matplotlib scipy networkx pandas seaborn")


if __name__ == "__main__":
    verify()

All code in this appendix is provided for educational use. You are encouraged to modify parameters, combine simulations, and build extensions. The best way to develop genuine probability intuition is to change the numbers, re-run the code, and ask: "Does the output match what I predicted?" When it doesn't, you've found something worth understanding.

For questions about the code or extensions, visit the companion repository linked from the textbook website.