Appendix F: Python Reference — Simulation Code
The Science of Luck: Statistical Thinking, Network Theory, Serendipity Engineering, Opportunity Recognition, and the Psychology of Chance
This appendix collects all Python simulation code from the textbook, organized by chapter, with full line-by-line comments, expected output descriptions, instructions for extending each simulation, and dependency information.
All code is written for Python 3.10+. It follows PEP 8 style guidelines and uses descriptive variable names for clarity.
Required dependencies (install once):
pip install numpy matplotlib scipy networkx pandas seaborn
Or using the requirements file from the textbook repository:
pip install -r requirements.txt
Chapter 6: Probability Intuition — Basic Probability Simulations
Simulation 6.1: Coin Flip Convergence
What this demonstrates: The law of large numbers — as you flip more coins, the proportion of heads converges toward 0.5. Short sequences can look very biased.
import numpy as np
import matplotlib.pyplot as plt
def simulate_coin_flips(total_flips: int, random_seed: int = 42) -> None:
"""
Simulate a series of fair coin flips and plot the running proportion of heads.
Shows how small samples can appear very biased even when the underlying
probability is exactly 0.5.
Args:
total_flips: Number of coin flips to simulate
random_seed: For reproducibility; change to see different realizations
"""
# Set the random seed so results are reproducible
rng = np.random.default_rng(seed=random_seed)
# Simulate: 1 = heads, 0 = tails
# Each flip is an independent Bernoulli trial with p = 0.5
flips = rng.integers(low=0, high=2, size=total_flips) # 0 or 1
# Calculate running proportion of heads after each flip
# cumsum gives the running total of heads; dividing by flip number gives proportion
running_heads_count = np.cumsum(flips)
flip_numbers = np.arange(1, total_flips + 1)
running_proportion = running_heads_count / flip_numbers
# Plot the results
fig, ax = plt.subplots(figsize=(12, 5))
ax.plot(flip_numbers, running_proportion,
color='steelblue', linewidth=0.8, label='Observed proportion of heads')
# Add the true probability line for reference
ax.axhline(y=0.5, color='red', linestyle='--', linewidth=1.5,
label='True probability (0.5)')
# Add shaded region showing +/- 10% band
ax.axhspan(0.45, 0.55, alpha=0.1, color='red', label='+/- 10% band')
ax.set_xlabel('Number of flips', fontsize=12)
ax.set_ylabel('Proportion of heads', fontsize=12)
ax.set_title(f'Coin Flip Convergence: {total_flips:,} Flips\n'
f'Notice how early results can mislead', fontsize=13)
ax.legend()
ax.set_ylim(0, 1)
ax.set_xscale('log') # Log scale makes early volatility and late convergence both visible
plt.tight_layout()
plt.savefig('coin_flip_convergence.png', dpi=150, bbox_inches='tight')
plt.show()
# Print summary statistics
print(f"After {total_flips:,} flips:")
print(f" Proportion heads: {running_proportion[-1]:.4f}")
print(f" Distance from true probability: {abs(running_proportion[-1] - 0.5):.4f}")
print(f"\nEarly sampling:")
print(f" After 5 flips: {running_proportion[4]:.2f} heads proportion")
print(f" After 50 flips: {running_proportion[49]:.2f} heads proportion")
print(f" After 500 flips: {running_proportion[499]:.2f} heads proportion")
if __name__ == "__main__":
simulate_coin_flips(total_flips=10_000)
Expected output: A graph showing the proportion of heads starting volatile and converging toward 0.5 as flips accumulate. Console output showing the proportion at 5, 50, 500, and 10,000 flips.
How to extend:
- Change random_seed to see different short runs.
- Replace rng.integers(0, 2, total_flips) with rng.binomial(1, 0.6, total_flips) to simulate a biased coin.
- Run multiple simulations (10 different seeds) and overlay them to show that all converge.
Simulation 6.2: Dice Roll Expected Value
import numpy as np
def dice_expected_value_simulation(num_rolls: int = 100_000) -> None:
"""
Roll a fair six-sided die many times and compare the sample mean
to the theoretical expected value of 3.5.
The expected value of a fair die = (1+2+3+4+5+6)/6 = 3.5.
No single roll can produce 3.5, but the average of many rolls converges to it.
"""
rng = np.random.default_rng(seed=0)
# Roll the die num_rolls times; integers 1 through 6 inclusive
rolls = rng.integers(low=1, high=7, size=num_rolls)
theoretical_ev = 3.5
print(f"Theoretical expected value of a fair die: {theoretical_ev}")
print(f"\nSample mean after:")
# Show convergence at multiple checkpoints
checkpoints = [10, 100, 1_000, 10_000, 100_000]
for n in checkpoints:
if n <= num_rolls:
sample_mean = np.mean(rolls[:n])
error = abs(sample_mean - theoretical_ev)
print(f" {n:>8,} rolls: mean = {sample_mean:.4f} (error: {error:.4f})")
if __name__ == "__main__":
dice_expected_value_simulation()
Expected output (values vary by seed):
Theoretical expected value of a fair die: 3.5
Sample mean after:
10 rolls: mean = 3.6000 (error: 0.1000)
100 rolls: mean = 3.4700 (error: 0.0300)
1,000 rolls: mean = 3.5010 (error: 0.0010)
10,000 rolls: mean = 3.4986 (error: 0.0014)
100,000 rolls: mean = 3.5004 (error: 0.0004)
Chapter 7: Law of Large Numbers — Multiple Trajectories
Simulation 7.1: Multiple Coin Flip Trajectories
import numpy as np
import matplotlib.pyplot as plt
def plot_multiple_coin_trajectories(
num_trajectories: int = 10,
num_flips: int = 1000
) -> None:
"""
Simulate multiple independent coin flip sequences and plot all of them.
Shows that EVERY sequence eventually converges to 0.5 -- but they take
different paths and look very different in the early stages.
Key insight: any one trajectory can look like a 'hot streak' in the short run
while being perfectly fair in the long run.
"""
rng = np.random.default_rng(seed=123)
fig, ax = plt.subplots(figsize=(12, 6))
flip_numbers = np.arange(1, num_flips + 1)
for trajectory_index in range(num_trajectories):
# Each trajectory is an independent sequence of fair coin flips
flips = rng.integers(0, 2, size=num_flips)
running_proportion = np.cumsum(flips) / flip_numbers
ax.plot(flip_numbers, running_proportion,
alpha=0.6,
linewidth=0.8,
label=f'Sequence {trajectory_index + 1}')
# The true probability line
ax.axhline(y=0.5, color='black', linestyle='--', linewidth=2,
label='True probability (0.5)', zorder=5)
ax.set_xlabel('Number of flips', fontsize=12)
ax.set_ylabel('Running proportion of heads', fontsize=12)
ax.set_title(f'{num_trajectories} Independent Coin Sequences\n'
'All converge to 0.5; each looks different in small samples',
fontsize=13)
ax.set_ylim(0, 1)
ax.legend(loc='upper right', fontsize=8, ncol=2)
plt.tight_layout()
plt.savefig('multiple_coin_trajectories.png', dpi=150)
plt.show()
if __name__ == "__main__":
plot_multiple_coin_trajectories()
Expected output: Ten overlapping lines starting chaotically and converging toward 0.5. Each trajectory looks like it has a personality (lucky, unlucky) — but all obey the same underlying probability.
Chapter 8: Regression to the Mean Simulation
Simulation 8.1: Test Score Regression
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
def simulate_regression_to_mean(
num_students: int = 500,
true_ability_mean: float = 70.0,
true_ability_std: float = 10.0,
measurement_noise_std: float = 8.0
) -> None:
"""
Simulate students taking two tests. Each score = true ability + random noise.
Students who scored highest on Test 1 tend to score lower on Test 2 -- not
because they declined, but because their Test 1 score was partly inflated
by lucky noise that is unlikely to repeat at the same magnitude.
This is regression to the mean: a mathematical inevitability, not a causal effect.
"""
rng = np.random.default_rng(seed=42)
# Each student has a stable true ability
true_abilities = rng.normal(loc=true_ability_mean,
scale=true_ability_std,
size=num_students)
# Test 1: true ability + random noise specific to this test occasion
test1_scores = true_abilities + rng.normal(0, measurement_noise_std, num_students)
# Test 2: same true ability + fresh random noise
test2_scores = true_abilities + rng.normal(0, measurement_noise_std, num_students)
# Identify top and bottom performers on Test 1
test1_rank = np.argsort(test1_scores)[::-1]
top_n = int(num_students * 0.10)
bottom_start = int(num_students * 0.90)
top_t1 = test1_scores[test1_rank[:top_n]]
top_t2 = test2_scores[test1_rank[:top_n]]
bottom_t1 = test1_scores[test1_rank[bottom_start:]]
bottom_t2 = test2_scores[test1_rank[bottom_start:]]
# Scatter plot: Test 1 vs Test 2, with regression line
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
axes[0].scatter(test1_scores, test2_scores, alpha=0.3, s=15, color='steelblue')
# Perfect correlation line (y = x)
score_range = [min(test1_scores.min(), test2_scores.min()),
max(test1_scores.max(), test2_scores.max())]
axes[0].plot(score_range, score_range, 'r--', label='y = x (no regression)', linewidth=1.5)
# Actual regression line
slope, intercept, r_value, _, _ = stats.linregress(test1_scores, test2_scores)
x_line = np.linspace(score_range[0], score_range[1], 100)
axes[0].plot(x_line, slope * x_line + intercept, 'g-',
label=f'Actual regression (slope={slope:.2f})', linewidth=2)
axes[0].set_xlabel('Test 1 Score', fontsize=11)
axes[0].set_ylabel('Test 2 Score', fontsize=11)
axes[0].set_title('Test 1 vs Test 2\nSlope < 1 reveals regression to the mean', fontsize=11)
axes[0].legend()
# Bar chart: average score change for top and bottom performers
categories = ['Top 10% by Test 1', 'Bottom 10% by Test 1']
t1_means = [np.mean(top_t1), np.mean(bottom_t1)]
t2_means = [np.mean(top_t2), np.mean(bottom_t2)]
x = np.array([0, 1])
width = 0.35
axes[1].bar(x - width/2, t1_means, width, label='Test 1', color='steelblue', alpha=0.8)
axes[1].bar(x + width/2, t2_means, width, label='Test 2', color='coral', alpha=0.8)
axes[1].set_xticks(x)
axes[1].set_xticklabels(categories, fontsize=10)
axes[1].set_ylabel('Average Score', fontsize=11)
axes[1].set_title('Regression to the Mean\n"Decline" and "Improvement" without real change',
fontsize=11)
axes[1].legend()
plt.suptitle('Regression to the Mean: A Statistical Inevitability', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('regression_to_mean.png', dpi=150)
plt.show()
print("Regression to the Mean Results")
print("="*50)
print(f"Top 10% — Test 1 mean: {np.mean(top_t1):.1f} | Test 2 mean: {np.mean(top_t2):.1f} "
f"| Change: {np.mean(top_t2) - np.mean(top_t1):+.1f}")
print(f"Bot 10% — Test 1 mean: {np.mean(bottom_t1):.1f} | Test 2 mean: {np.mean(bottom_t2):.1f} "
f"| Change: {np.mean(bottom_t2) - np.mean(bottom_t1):+.1f}")
print()
print("Student ability did NOT change. The changes reflect noise, not real improvement.")
if __name__ == "__main__":
simulate_regression_to_mean()
Chapter 9: Survivorship Bias Simulation
Simulation 9.1: The Invisible Graveyard
import numpy as np
import matplotlib.pyplot as plt
def simulate_survivorship_bias(
num_entrepreneurs: int = 10_000,
success_probability: float = 0.10
) -> None:
"""
Simulate a population of entrepreneurs where success is purely random
(the measured 'trait' has zero causal effect). Demonstrates that studying
only survivors produces apparent trait differences that are pure artifact.
This is why 'success advice' from successful people is systematically misleading:
you are sampling a biased subset of all people who tried.
"""
rng = np.random.default_rng(seed=99)
# Each entrepreneur has a trait score — drawn from same distribution regardless of outcome
# In this simulation, the trait has ZERO causal effect on success
trait_scores = rng.normal(loc=50, scale=15, size=num_entrepreneurs)
# Success is purely random — completely independent of trait
successes = rng.random(size=num_entrepreneurs) < success_probability
survivors_traits = trait_scores[successes]
failures_traits = trait_scores[~successes]
survivors_mean = np.mean(survivors_traits)
population_mean = np.mean(trait_scores)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Left: What you see if you only study survivors
axes[0].hist(survivors_traits, bins=30, color='gold', alpha=0.8,
edgecolor='black', density=True)
axes[0].axvline(x=survivors_mean, color='red', linestyle='--', linewidth=2,
label=f'Survivors mean: {survivors_mean:.1f}')
axes[0].axvline(x=population_mean, color='blue', linestyle='--', linewidth=2,
label=f'True population mean: {population_mean:.1f}')
axes[0].set_title(f'Biased View: Survivors Only\n(n={survivors_traits.size:,}, '
f'{success_probability:.0%} of population)', fontsize=11)
axes[0].set_xlabel('Trait Score')
axes[0].legend()
# Right: The full picture
axes[1].hist(failures_traits, bins=30, color='lightcoral', alpha=0.7,
density=True, label=f'Failures (n={failures_traits.size:,})')
axes[1].hist(survivors_traits, bins=30, color='gold', alpha=0.7,
density=True, label=f'Survivors (n={survivors_traits.size:,})')
axes[1].axvline(x=population_mean, color='blue', linestyle='--', linewidth=2,
label=f'Population mean: {population_mean:.1f}')
axes[1].set_title('Full Picture: Survivors AND Failures\n'
'(The trait has ZERO causal effect)', fontsize=11)
axes[1].set_xlabel('Trait Score')
axes[1].legend()
plt.suptitle('Survivorship Bias: Studying Winners Misleads', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('survivorship_bias.png', dpi=150)
plt.show()
apparent_advantage = survivors_mean - population_mean
print("Survivorship Bias Simulation")
print(f"Survivors mean trait: {survivors_mean:.1f} vs. Population mean: {population_mean:.1f}")
print(f"Apparent 'trait advantage' of survivors: {apparent_advantage:+.1f} points")
print("This gap is 100% sampling artifact -- the trait caused nothing.")
if __name__ == "__main__":
simulate_survivorship_bias()
Chapter 10: Expected Value Calculator
Simulation 10.1: Expected Value Decision Tool
from dataclasses import dataclass
from typing import List
@dataclass
class Outcome:
"""Represents one possible outcome of a decision."""
description: str
probability: float # Must be between 0 and 1; all outcomes should sum to 1.0
value: float # Positive = gain, negative = loss
def calculate_expected_value(outcomes: List[Outcome]) -> float:
"""
Calculate the expected value of a set of outcomes.
EV = sum of (probability * value) for each outcome.
"""
total_prob = sum(o.probability for o in outcomes)
if abs(total_prob - 1.0) > 0.001:
print(f"WARNING: Probabilities sum to {total_prob:.4f}, not 1.0")
return sum(o.probability * o.value for o in outcomes)
def display_ev_analysis(decision_name: str, outcomes: List[Outcome]) -> None:
"""Display a complete expected value analysis."""
ev = calculate_expected_value(outcomes)
print(f"\nExpected Value Analysis: {decision_name}")
print("=" * 60)
print(f"{'Outcome':<30} {'P':<8} {'Value':<10} {'P x Value':<10}")
print("-" * 60)
for o in outcomes:
weighted = o.probability * o.value
print(f"{o.description:<30} {o.probability:<8.3f} {o.value:<10.2f} {weighted:<10.2f}")
print("-" * 60)
print(f"{'EXPECTED VALUE':<50} {ev:<10.2f}")
print()
if ev > 0:
print(f"Result: POSITIVE expected value (+{ev:.2f}) -- favorable bet in the long run.")
elif ev < 0:
print(f"Result: NEGATIVE expected value ({ev:.2f}) -- unfavorable bet in the long run.")
else:
print("Result: BREAK-EVEN (fair bet, no long-run advantage).")
if __name__ == "__main__":
# Example 1: Should Priya attend a $400 conference?
display_ev_analysis(
"Priya's Industry Conference (net of $400 cost)",
[
Outcome("High-value introduction leads to job offer", 0.15, 5000),
Outcome("Useful contacts, some leads", 0.35, 800),
Outcome("Minimal direct value", 0.50, 0),
]
)
# Subtract cost manually
ev_before_cost = 0.15*5000 + 0.35*800 + 0.50*0
print(f"Gross EV: ${ev_before_cost:.0f}, minus $400 cost = Net EV: ${ev_before_cost-400:.0f}")
# Example 2: Dr. Yuki's poker flush draw
flush_probability = 9 / 46 # 9 outs, 46 unseen cards
pot_size = 200
call_amount = 30
display_ev_analysis(
"Dr. Yuki's Flush Draw on the River",
[
Outcome("Flush completes (win pot)", flush_probability, pot_size),
Outcome("Flush misses (lose call)", 1 - flush_probability, -call_amount),
]
)
# Example 3: Nadia's paid workshop application ($200 entry fee, $2000 prize)
display_ev_analysis(
"Nadia's Content Creator Competition",
[
Outcome("Win ($2000 prize)", 0.08, 2000 - 200),
Outcome("Top 10 (exposure value only)", 0.20, 300 - 200),
Outcome("No placement", 0.72, -200),
]
)
Chapter 11: Birthday Problem and Monty Hall
Simulation 11.1: Birthday Problem
import numpy as np
import matplotlib.pyplot as plt
def birthday_problem_simulation(max_group_size: int = 60, num_simulations: int = 10_000) -> None:
"""
Estimate the probability of a shared birthday for different group sizes.
The surprising result: only 23 people needed for >50% probability.
"""
rng = np.random.default_rng(seed=777)
group_sizes = list(range(2, max_group_size + 1))
simulated_probs = []
theoretical_probs = []
for n in group_sizes:
# Simulate num_simulations groups of size n
birthdays = rng.integers(1, 366, size=(num_simulations, n))
has_shared = np.array([len(np.unique(birthdays[i])) < n
for i in range(num_simulations)])
simulated_probs.append(np.mean(has_shared))
# Theoretical: P(shared) = 1 - P(no shared)
p_no_shared = np.prod([(365 - i) / 365 for i in range(n)])
theoretical_probs.append(1 - p_no_shared)
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(group_sizes, simulated_probs, 'steelblue', linewidth=1.5, label='Simulated')
ax.plot(group_sizes, theoretical_probs, 'r--', linewidth=1.5, label='Theoretical')
ax.axhline(y=0.5, color='gray', linestyle=':', linewidth=1)
ax.axvline(x=23, color='green', linestyle=':', linewidth=1)
ax.annotate('23 people -> 50.7%', xy=(23, 0.5), xytext=(30, 0.35),
arrowprops=dict(arrowstyle='->'), fontsize=10, color='darkgreen')
ax.set_xlabel('Group Size')
ax.set_ylabel('P(at least one shared birthday)')
ax.set_title('The Birthday Problem')
ax.legend()
plt.tight_layout()
plt.savefig('birthday_problem.png', dpi=150)
plt.show()
print("Birthday Problem Key Values:")
for n in [10, 20, 23, 30, 40, 50]:
idx = n - 2
print(f" {n} people: simulated={simulated_probs[idx]:.3f}, theoretical={theoretical_probs[idx]:.3f}")
if __name__ == "__main__":
birthday_problem_simulation()
Simulation 11.2: Monty Hall Problem
import numpy as np
def monty_hall_simulation(num_games: int = 100_000) -> None:
"""
Simulate the Monty Hall problem.
STAY wins ~33% of the time. SWITCH wins ~67% of the time.
Counterintuitive because most people think it's 50-50 after Monty reveals.
It's not: the initial pick retains its 1/3 probability; the switch door
absorbs the remaining 2/3.
"""
rng = np.random.default_rng(seed=404)
car_locations = rng.integers(0, 3, size=num_games) # Where the car is
initial_picks = rng.integers(0, 3, size=num_games) # Player's first pick
stay_wins = 0
switch_wins = 0
for game in range(num_games):
car = car_locations[game]
pick = initial_picks[game]
# Monty opens a door: not the player's pick, not the car
can_open = [d for d in [0, 1, 2] if d != pick and d != car]
monty_opens = int(rng.choice(can_open))
# Stay strategy: player keeps original pick
if pick == car:
stay_wins += 1
# Switch strategy: player moves to the other closed door
switch_to = [d for d in [0, 1, 2] if d != pick and d != monty_opens][0]
if switch_to == car:
switch_wins += 1
print("Monty Hall Simulation Results")
print(f"Games: {num_games:,}")
print(f"STAY wins: {stay_wins:,} ({stay_wins/num_games:.1%}) [Theoretical: 33.3%]")
print(f"SWITCH wins: {switch_wins:,} ({switch_wins/num_games:.1%}) [Theoretical: 66.7%]")
print()
print("Conclusion: ALWAYS SWITCH. Switching is approximately twice as likely to win.")
if __name__ == "__main__":
monty_hall_simulation()
Expected output:
Monty Hall Simulation Results
Games: 100,000
STAY wins: 33,289 (33.3%) [Theoretical: 33.3%]
SWITCH wins: 66,711 (66.7%) [Theoretical: 66.7%]
Chapter 20: Network Graph with NetworkX
Simulation 20.1: Small-World vs. Random Network
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
def compare_network_topologies(num_nodes: int = 50, seed: int = 42) -> None:
"""
Create and compare three network types to illustrate the Watts-Strogatz insight:
- Regular lattice: high clustering, LONG paths
- Small-world: high clustering, SHORT paths (achieved by a few random rewirings)
- Random: low clustering, short paths
The small-world network achieves what seems impossible: keeping communities tight
while making everyone close to everyone else (like 'six degrees of separation').
"""
k_neighbors = 6 # Each node connects to k nearest neighbors in the lattice
# 1. Regular ring lattice (no randomness)
lattice = nx.watts_strogatz_graph(n=num_nodes, k=k_neighbors, p=0.0, seed=seed)
# 2. Small-world: rewire 5% of edges randomly (a tiny fraction)
small_world = nx.watts_strogatz_graph(n=num_nodes, k=k_neighbors, p=0.05, seed=seed)
# 3. Erdos-Renyi random graph with comparable edge density
random_graph = nx.erdos_renyi_graph(n=num_nodes, p=k_neighbors/num_nodes, seed=seed)
networks = {
'Regular Lattice': lattice,
'Small-World (p=0.05)': small_world,
'Random Graph': random_graph,
}
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
for ax, (title, G) in zip(axes, networks.items()):
# Ensure connectivity for metrics (use largest component if disconnected)
if nx.is_connected(G):
apl = nx.average_shortest_path_length(G)
else:
lcc = G.subgraph(max(nx.connected_components(G), key=len))
apl = nx.average_shortest_path_length(lcc)
cc = nx.average_clustering(G)
pos = nx.circular_layout(G)
nx.draw_networkx(G, pos=pos, ax=ax,
node_size=80, node_color='steelblue',
edge_color='gray', width=0.5,
with_labels=False, alpha=0.8)
ax.set_title(f'{title}\nAvg path length: {apl:.2f} | Clustering: {cc:.3f}',
fontsize=10)
ax.axis('off')
plt.suptitle('Network Topologies: The Watts-Strogatz Insight', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('network_topologies.png', dpi=150)
plt.show()
print("Network Comparison:")
print(f"{'Type':<25} {'Avg Path Length':<20} {'Clustering Coeff':<20}")
print("-" * 65)
for title, G in networks.items():
if nx.is_connected(G):
apl = nx.average_shortest_path_length(G)
else:
lcc = G.subgraph(max(nx.connected_components(G), key=len))
apl = nx.average_shortest_path_length(lcc)
cc = nx.average_clustering(G)
print(f"{title:<25} {apl:<20.3f} {cc:<20.3f}")
print()
print("Key insight: Small-world rewiring (just 5% of edges) dramatically")
print("shortens path lengths while barely reducing clustering.")
print("This explains six degrees of separation in large social networks.")
if __name__ == "__main__":
compare_network_topologies()
Chapter 22: Viral Spread Simulation
Simulation 22.1: Content Virality Model
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass
@dataclass
class ContentPost:
"""Represents a piece of content on a social media platform."""
quality_score: float # Intrinsic quality 0-1 (affects sharing rate)
initial_reach: int # Starting audience (current followers who see it)
viral_coefficient: float # Average shares per viewer (k-factor; k>1 means viral)
platform_boost_chance: float # Probability of algorithmic amplification per cycle
def simulate_virality(post: ContentPost, num_steps: int = 10, num_sims: int = 1000,
seed: int = 22) -> None:
"""
Show that the same quality content produces wildly different reach outcomes
due to random variation in early sharing. Early luck (who shares first) gets
amplified by the platform algorithm.
Viral coefficient (k):
- k < 1: spread dies out (most posts)
- k = 1: stable plateau
- k > 1: exponential growth
"""
rng = np.random.default_rng(seed=seed)
all_trajectories = np.zeros((num_sims, num_steps + 1))
all_trajectories[:, 0] = post.initial_reach
for sim in range(num_sims):
reach = float(post.initial_reach)
for step in range(1, num_steps + 1):
# New views = Poisson(reach * viral_coefficient)
# Poisson captures randomness: sometimes more shares, sometimes fewer
new_views = rng.poisson(lam=max(reach * post.viral_coefficient, 0))
# Random platform boost (algorithm picks this content up)
if rng.random() < post.platform_boost_chance:
boost = rng.uniform(2, 10)
new_views = int(new_views * boost)
reach = new_views
all_trajectories[sim, step] = reach
peak_reaches = np.max(all_trajectories, axis=1)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Spaghetti plot of trajectories
time_steps = range(num_steps + 1)
for sim in range(min(150, num_sims)):
axes[0].plot(time_steps, all_trajectories[sim], alpha=0.08,
color='steelblue', linewidth=0.7)
axes[0].plot(time_steps, np.median(all_trajectories, axis=0), 'red', linewidth=2,
label='Median')
axes[0].plot(time_steps, np.percentile(all_trajectories, 95, axis=0), 'green',
linewidth=2, linestyle='--', label='95th percentile (viral)')
axes[0].set_xlabel('Time steps (sharing cycles)')
axes[0].set_ylabel('Content Reach')
axes[0].set_title(f'Content Spread: {num_sims} Simulations\n'
f'Same content, different luck')
axes[0].legend()
# Distribution of peak reach (log scale)
log_peaks = np.log10(peak_reaches + 1)
axes[1].hist(log_peaks, bins=40, color='steelblue', alpha=0.8, edgecolor='black')
for pct, label, color in [(50, 'Median', 'red'), (90, '90th pct', 'orange'),
(99, '99th pct', 'green')]:
val = np.percentile(peak_reaches, pct)
axes[1].axvline(x=np.log10(val+1), color=color, linestyle='--',
label=f'{label}: {val:,.0f}')
axes[1].set_xlabel('Log10(Peak Reach)')
axes[1].set_ylabel('Number of simulations')
axes[1].set_title('Distribution of Peak Reach\n(power law tail visible on log scale)')
axes[1].legend()
plt.suptitle('The Luck of Virality: Same Content, Different Outcomes', fontsize=13)
plt.tight_layout()
plt.savefig('virality_simulation.png', dpi=150)
plt.show()
print("Virality Simulation Summary:")
for pct in [25, 50, 75, 90, 95, 99]:
print(f" {pct}th percentile peak reach: {np.percentile(peak_reaches, pct):>10,.0f}")
print(f"\n Mean vs. Median: {np.mean(peak_reaches):,.0f} vs. {np.median(peak_reaches):,.0f}")
print(" Large gap = power law distribution driven by rare viral outliers.")
if __name__ == "__main__":
# Nadia's content with moderate quality and sub-viral coefficient
nadia_post = ContentPost(
quality_score=0.70,
initial_reach=500,
viral_coefficient=0.15,
platform_boost_chance=0.03
)
simulate_virality(nadia_post, num_sims=2000)
Chapter 25: Opportunity Surface Calculator
Simulation 25.1: Opportunity Surface Scoring
from dataclasses import dataclass
from typing import List
@dataclass
class Context:
"""A context (physical or digital environment) you inhabit."""
name: str
frequency_per_month: int # Sessions per month
new_people_per_session: int # Average new people encountered
diversity_score: float # How different are these people? 0-1
engagement_depth: float # Active participation depth 0-1
def opportunity_surface_score(contexts: List[Context]) -> None:
"""
Calculate and display opportunity surface across all inhabited contexts.
Opportunity Surface = sum of (frequency x new_people x diversity x engagement)
All four factors multiply because missing any one collapses the score:
- Frequency: can't benefit from a context you don't visit
- New people: require new people for lucky encounters
- Diversity: need different-circle people for weak tie benefit
- Engagement: passive consumption doesn't create serendipity
"""
print("Opportunity Surface Audit")
print("=" * 70)
print(f"{'Context':<22} {'Monthly':<10} {'Diversity':<10} {'Engage':<10} {'Score':<10}")
print("-" * 70)
scores = []
total = 0.0
for ctx in contexts:
monthly_encounters = ctx.frequency_per_month * ctx.new_people_per_session
score = monthly_encounters * ctx.diversity_score * ctx.engagement_depth
scores.append((ctx.name, score))
total += score
print(f"{ctx.name:<22} {monthly_encounters:<10,} {ctx.diversity_score:<10.2f} "
f"{ctx.engagement_depth:<10.2f} {score:<10.1f}")
print("-" * 70)
print(f"{'TOTAL OPPORTUNITY SURFACE':<52} {total:<10.1f}")
print()
scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True)
print("Top three opportunity sources:")
for rank, (name, score) in enumerate(scores_sorted[:3], 1):
pct = score / total * 100
print(f" {rank}. {name}: {score:.1f} points ({pct:.0f}% of total)")
print()
if total < 100:
print("Score: LOW (<100). Add at least one high-diversity context immediately.")
elif total < 500:
print("Score: MODERATE (100-500). Increase engagement depth in top contexts.")
elif total < 1500:
print("Score: GOOD (500-1500). Maintain consistency and deepen new contacts.")
else:
print("Score: EXCELLENT (1500+). Focus quality over quantity.")
if __name__ == "__main__":
# Priya before luck science
priya_before = [
Context("Home (isolated)", 30, 0, 0.0, 0.1),
Context("LinkedIn (passive)", 20, 0, 0.3, 0.1),
Context("Close friend group", 8, 0, 0.1, 0.8),
Context("Online job boards", 20, 0, 0.4, 0.2),
]
print("PRIYA -- Before:")
opportunity_surface_score(priya_before)
print("\n" + "="*70 + "\n")
# Priya after applying luck architecture
priya_after = [
Context("Industry meetups (4x/mo)", 4, 15, 0.8, 0.9),
Context("LinkedIn (active)", 20, 3, 0.7, 0.7),
Context("Professional Slack", 20, 2, 0.7, 0.8),
Context("Industry conference", 1, 50, 0.9, 0.9),
Context("1-on-1 coffee meetings", 4, 1, 0.8, 1.0),
Context("Online course community", 8, 2, 0.6, 0.7),
Context("Close friend group", 8, 1, 0.2, 0.8),
]
print("PRIYA -- After Luck Architecture:")
opportunity_surface_score(priya_after)
Chapter 34: Social Media Luck Metrics Tracker
Simulation 34.1: Content Performance Tracker
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
def generate_content_performance_data(num_posts: int = 60, seed: int = 34) -> pd.DataFrame:
"""
Generate simulated content performance data representing Nadia's 60-post dataset.
Models the reality that performance is a mix of quality signal + random noise,
with occasional algorithmic boosts producing outlier results.
"""
rng = np.random.default_rng(seed=seed)
# Post dates over 60 days
start_date = datetime(2025, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(num_posts)]
# Simulate an improving quality trajectory (Nadia gets better over time)
base_quality = np.linspace(0.4, 0.7, num_posts) # Gradual improvement
# Views = quality * base_reach * random_multiplier
base_reach = 500
random_multiplier = rng.lognormal(mean=0, sigma=0.8, size=num_posts)
algorithmic_boost = np.where(rng.random(num_posts) < 0.05,
rng.uniform(3, 10, num_posts),
1.0)
views = (base_quality * base_reach * random_multiplier * algorithmic_boost).astype(int)
likes = (views * rng.uniform(0.03, 0.08, num_posts)).astype(int)
shares = (views * rng.uniform(0.005, 0.02, num_posts)).astype(int)
comments = (views * rng.uniform(0.002, 0.01, num_posts)).astype(int)
# Post type categories
post_types = rng.choice(['Tutorial', 'Story', 'Opinion', 'Behind-the-scenes'],
size=num_posts, p=[0.3, 0.3, 0.2, 0.2])
return pd.DataFrame({
'date': dates,
'post_type': post_types,
'views': views,
'likes': likes,
'shares': shares,
'comments': comments,
'engagement_rate': (likes + shares + comments) / np.maximum(views, 1)
})
def analyze_content_luck(df: pd.DataFrame) -> None:
"""
Analyze content performance data to distinguish signal from noise.
Applies survivorship bias awareness and regression to the mean concepts.
"""
print("Content Performance Analysis: Nadia's 60-Post Dataset")
print("=" * 60)
print()
# Overall statistics
print("Overall Statistics:")
print(f" Median views: {df['views'].median():,.0f}")
print(f" Mean views: {df['views'].mean():,.0f} (higher than median = skewed by outliers)")
print(f" Max views: {df['views'].max():,.0f}")
print(f" % posts over 1,000 views: {(df['views'] > 1000).mean():.1%}")
print()
# Performance by post type
print("Performance by Post Type:")
type_stats = df.groupby('post_type')['views'].agg(['median', 'mean', 'count'])
type_stats.columns = ['Median Views', 'Mean Views', 'Count']
print(type_stats.to_string())
print()
# Regression to the mean: do top-performing posts predict next post?
df_sorted = df.sort_values('date').reset_index(drop=True)
top_posts = df_sorted[df_sorted['views'] > df_sorted['views'].quantile(0.90)]
print("Regression to the Mean Check:")
print(f" Top 10% posts average views: {top_posts['views'].mean():,.0f}")
print(f" Those posts' NEXT post average views: "
f"{df_sorted.loc[top_posts.index + 1, 'views'].dropna().mean():,.0f}")
print(" (Lower next-post average = regression toward the mean)")
print()
# Visualize
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# Views over time with trend line
axes[0, 0].scatter(range(len(df)), df['views'], alpha=0.6, color='steelblue', s=30)
# Simple trend line
z = np.polyfit(range(len(df)), df['views'], 1)
p = np.poly1d(z)
axes[0, 0].plot(range(len(df)), p(range(len(df))), 'r--', linewidth=2, label='Trend')
axes[0, 0].set_title('Views Over Time\n(Noise obscures the upward trend)')
axes[0, 0].set_xlabel('Post Number')
axes[0, 0].set_ylabel('Views')
axes[0, 0].legend()
# Distribution of views (shows power law tail)
axes[0, 1].hist(df['views'], bins=20, color='steelblue', alpha=0.8, edgecolor='black')
axes[0, 1].axvline(df['views'].median(), color='red', linestyle='--', label='Median')
axes[0, 1].axvline(df['views'].mean(), color='orange', linestyle='--', label='Mean')
axes[0, 1].set_title('Views Distribution\n(Right-skewed: most posts low, few viral)')
axes[0, 1].set_xlabel('Views')
axes[0, 1].legend()
# Views by post type (box plots)
post_types = df['post_type'].unique()
views_by_type = [df[df['post_type'] == pt]['views'].values for pt in post_types]
axes[1, 0].boxplot(views_by_type, labels=post_types)
axes[1, 0].set_title('Views by Post Type')
axes[1, 0].set_ylabel('Views')
# Engagement rate trend
rolling_engagement = df['engagement_rate'].rolling(window=7, min_periods=1).mean()
axes[1, 1].plot(range(len(df)), df['engagement_rate'], alpha=0.4, color='steelblue')
axes[1, 1].plot(range(len(df)), rolling_engagement, 'red', linewidth=2,
label='7-post rolling average')
axes[1, 1].set_title('Engagement Rate Over Time')
axes[1, 1].set_xlabel('Post Number')
axes[1, 1].set_ylabel('Engagement Rate')
axes[1, 1].legend()
plt.suptitle("Nadia's Content Luck Analysis: Signal vs. Noise", fontsize=13)
plt.tight_layout()
plt.savefig('content_analysis.png', dpi=150)
plt.show()
if __name__ == "__main__":
df = generate_content_performance_data()
analyze_content_luck(df)
Chapter 36: Luck Audit Radar Chart
Simulation 36.1: Luck Audit Visualizer
import numpy as np
import matplotlib.pyplot as plt
def visualize_luck_audit(scores: dict, title: str = "Luck Audit") -> None:
"""
Display luck audit scores as a radar (spider) chart.
Args:
scores: Dict mapping domain names to scores 1-10
title: Chart title
"""
domains = list(scores.keys())
values = list(scores.values())
n = len(domains)
# Angles for each domain (evenly spaced around the circle)
angles = np.linspace(0, 2 * np.pi, n, endpoint=False).tolist()
# Close the polygon by repeating the first value
values_plot = values + [values[0]]
angles_plot = angles + [angles[0]]
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
# User's scores
ax.fill(angles_plot, values_plot, alpha=0.25, color='steelblue')
ax.plot(angles_plot, values_plot, 'o-', color='steelblue', linewidth=2, label='Your scores')
# Target line (score = 7 = "good")
target = [7] * n + [7]
ax.plot(angles_plot, target, '--', color='green', alpha=0.5, linewidth=1.5,
label='Good target (7/10)')
# Max line
max_line = [10] * n + [10]
ax.plot(angles_plot, max_line, ':', color='gray', alpha=0.3)
ax.set_xticks(angles)
ax.set_xticklabels(domains, fontsize=10)
ax.set_ylim(0, 10)
ax.set_yticks([2, 4, 6, 8, 10])
ax.grid(color='gray', alpha=0.3)
ax.set_title(f'{title}\nTotal: {sum(values)}/70', fontsize=13, fontweight='bold', pad=20)
ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
plt.tight_layout()
filename = title.lower().replace(' ', '_').replace(':', '') + '.png'
plt.savefig(filename, dpi=150, bbox_inches='tight')
plt.show()
# Analysis
total = sum(values)
sorted_domains = sorted(scores.items(), key=lambda x: x[1])
print(f"\n{title}: Total {total}/70")
print(f"Highest leverage (lowest scores): {', '.join(d for d, _ in sorted_domains[:2])}")
print(f"Strongest areas: {', '.join(d for d, _ in sorted_domains[-2:])}")
if __name__ == "__main__":
# Priya's audit trajectory
priya_before = {
"Network Quality": 3,
"Opportunity Surface": 4,
"Mindset": 5,
"Skills": 6,
"Attention Quality": 5,
"Timing Awareness": 4,
"Resilience": 6
}
visualize_luck_audit(priya_before, "Priya: Before Luck Science")
priya_after = {
"Network Quality": 7,
"Opportunity Surface": 8,
"Mindset": 7,
"Skills": 7,
"Attention Quality": 7,
"Timing Awareness": 6,
"Resilience": 8
}
visualize_luck_audit(priya_after, "Priya: Six Months Later")
Chapter 40: Personal Luck Strategy Planner
Simulation 40.1: 90-Day Action Priority Calculator
from dataclasses import dataclass
from typing import List
@dataclass
class LuckAction:
"""A specific luck-architecture action to implement."""
description: str
domain: str
effort_required: int # 1-10 scale
expected_impact: int # 1-10 scale
timeline_days: int # Days until results visible
reversible: bool
def priority_score(action: LuckAction) -> float:
"""
Score = (impact / effort) * reversibility_bonus * urgency_factor.
Higher = higher priority.
"""
efficiency = action.expected_impact / max(action.effort_required, 1)
reversibility_bonus = 1.1 if action.reversible else 1.0
urgency_factor = 1.3 if action.timeline_days < 30 else (1.1 if action.timeline_days < 60 else 1.0)
return efficiency * reversibility_bonus * urgency_factor
def generate_90_day_plan(actions: List[LuckAction], person_name: str = "You") -> None:
"""Generate a prioritized 90-day luck action plan."""
scored = sorted([(a, priority_score(a)) for a in actions], key=lambda x: x[1], reverse=True)
print(f"\n90-Day Luck Activation Plan: {person_name}")
print("=" * 65)
for phase_label, day_range in [("PHASE 1 (Days 1-30)", (0, 30)),
("PHASE 2 (Days 31-60)", (30, 60)),
("PHASE 3 (Days 61-90)", (60, 91))]:
phase_actions = [(a, s) for a, s in scored
if day_range[0] < a.timeline_days <= day_range[1]][:3]
if not phase_actions:
# Fall through remaining actions
phase_actions = scored[:3]
print(f"\n{phase_label}:")
print("-" * 65)
for rank, (action, score) in enumerate(phase_actions, 1):
print(f" {rank}. [{action.domain}] {action.description}")
print(f" Impact {action.expected_impact}/10 | Effort {action.effort_required}/10 "
f"| Results in {action.timeline_days} days | Priority: {score:.2f}")
if __name__ == "__main__":
# Nadia's 90-day plan at Chapter 40
nadia_plan = [
LuckAction("Start luck journal — daily for 30 days",
"Mindset", 2, 7, 30, True),
LuckAction("Attend one creator meetup/event",
"Opportunity Surface", 3, 7, 14, True),
LuckAction("Reactivate 5 creator weak ties with genuine messages",
"Network", 2, 6, 14, True),
LuckAction("Post first 'behind the scenes' serendipity hook post",
"Opportunity Surface", 3, 6, 7, True),
LuckAction("Join one niche community in her content area",
"Network", 2, 7, 30, True),
LuckAction("Deep dive course: platform algorithm mechanics",
"Skills", 5, 8, 45, True),
LuckAction("Establish weekly content + data review routine",
"Attention Quality", 3, 7, 30, True),
LuckAction("Build editorial content calendar for 60 days",
"Timing Awareness", 4, 6, 45, True),
LuckAction("Pitch one collaboration to mid-sized creator",
"Network", 5, 8, 45, True),
]
generate_90_day_plan(nadia_plan, "Nadia")
Quick Start: Dependency Verification
"""
verify_setup.py — Run this first to confirm all dependencies are installed.
Usage:
python verify_setup.py
"""
import importlib
import sys
REQUIRED = [
"numpy",
"matplotlib",
"scipy",
"networkx",
"pandas",
"seaborn",
]
def verify() -> None:
print(f"Python {sys.version}")
if sys.version_info < (3, 10):
print("WARNING: Python 3.10+ required.")
else:
print("Python version: OK")
print()
all_ok = True
for pkg in REQUIRED:
try:
mod = importlib.import_module(pkg)
version = getattr(mod, "__version__", "unknown")
print(f" OK {pkg} ({version})")
except ImportError:
print(f" MISSING {pkg} -- install: pip install {pkg}")
all_ok = False
print()
if all_ok:
print("All dependencies installed. Ready to run simulations.")
else:
print("Install all missing packages:")
print(" pip install numpy matplotlib scipy networkx pandas seaborn")
if __name__ == "__main__":
verify()
All code in this appendix is provided for educational use. You are encouraged to modify parameters, combine simulations, and build extensions. The best way to develop genuine probability intuition is to change the numbers, re-run the code, and ask: "Does the output match what I predicted?" When it doesn't, you've found something worth understanding.
For questions about the code or extensions, visit the companion repository linked from the textbook website.