Appendix C: Python Reference / API Reference

This appendix serves as a quick-reference guide for the Python libraries, language features, and prediction market APIs used throughout this book. It is not a substitute for the official documentation of each library, but rather a curated selection of the functions and patterns that appear most frequently in the code examples.

C.1 Python Quick Reference

Core Data Structures

# Lists — ordered, mutable sequences
prices = [0.45, 0.52, 0.61, 0.58]
prices.append(0.63)          # Add to end
prices.insert(0, 0.40)      # Insert at index
last = prices.pop()         # Remove and return last element
prices.sort()               # Sort in place
prices_sorted = sorted(prices)  # Return new sorted list

# Dictionaries — key-value mappings
market = {
    "question": "Will event X occur?",
    "price": 0.62,
    "volume": 15000,
    "close_date": "2026-12-31"
}
market["price"]              # Access by key
market.get("category", "Unknown")  # Access with default
market.keys()                # All keys
market.values()              # All values
market.items()               # Key-value pairs

# Sets — unordered collections of unique elements
categories = {"politics", "economics", "sports"}
categories.add("technology")
overlap = categories & other_set   # Intersection
combined = categories | other_set  # Union

# Tuples — ordered, immutable sequences
coordinates = (0.65, 0.35)   # Often used for (yes_prob, no_prob)

List Comprehensions and Generator Expressions

# List comprehension
returns = [(sell - buy) / buy for buy, sell in zip(buy_prices, sell_prices)]

# Filtered list comprehension
profitable = [r for r in returns if r > 0]

# Dictionary comprehension
price_map = {m["id"]: m["price"] for m in markets}

# Generator expression (memory-efficient for large datasets)
total_volume = sum(m["volume"] for m in markets)

# Nested comprehension
all_prices = [p for market in markets for p in market["price_history"]]

Lambda Functions and Functional Tools

# Lambda: anonymous single-expression functions
sort_by_volume = sorted(markets, key=lambda m: m["volume"], reverse=True)

# map: apply function to each element
log_odds = list(map(lambda p: math.log(p / (1 - p)), probabilities))

# filter: select elements matching condition
active = list(filter(lambda m: m["status"] == "open", markets))

# zip: pair elements from multiple iterables
for date, price, volume in zip(dates, prices, volumes):
    print(f"{date}: ${price:.2f} ({volume} contracts)")

Common Patterns Used in This Book

# Unpacking
mean, std = np.mean(data), np.std(data)

# Ternary expression
label = "overpriced" if model_prob < market_price else "underpriced"

# Dictionary as switch
score_fn = {
    "brier": lambda p, o: (p - o) ** 2,
    "log": lambda p, o: -np.log(p if o == 1 else 1 - p),
}

# Context managers for file I/O
with open("market_data.json", "r") as f:
    data = json.load(f)

# f-strings for formatted output
print(f"Market: {name}, Price: {price:.4f}, Edge: {edge:+.2%}")

# try/except for API calls
try:
    response = requests.get(url, timeout=10)
    response.raise_for_status()
    data = response.json()
except requests.RequestException as e:
    print(f"API error: {e}")

C.2 NumPy Reference

Array Creation and Manipulation

import numpy as np

# Creation
a = np.array([0.45, 0.52, 0.61])       # From list
zeros = np.zeros(100)                     # 100 zeros
ones = np.ones((3, 4))                   # 3x4 matrix of ones
rng = np.arange(0, 1, 0.01)             # [0.00, 0.01, ..., 0.99]
lin = np.linspace(0, 1, 101)            # 101 points from 0 to 1
eye = np.eye(3)                          # 3x3 identity matrix

# Reshaping
a.reshape(3, 1)                          # Column vector
a.flatten()                              # 1D array
np.expand_dims(a, axis=0)               # Add dimension

# Indexing and slicing
a[0]                                     # First element
a[-1]                                    # Last element
a[1:3]                                   # Elements at index 1 and 2
a[a > 0.5]                              # Boolean indexing (filter)
np.where(a > 0.5, a, 0)                 # Conditional replacement

Mathematical Operations

# Element-wise arithmetic
returns = (sell_prices - buy_prices) / buy_prices
log_prices = np.log(prices)
exp_values = np.exp(log_odds)

# Aggregations
np.mean(returns)                         # Mean
np.std(returns, ddof=1)                 # Sample standard deviation
np.var(returns, ddof=1)                 # Sample variance
np.median(returns)                       # Median
np.percentile(returns, [25, 50, 75])    # Quartiles
np.sum(weights * returns)               # Weighted sum
np.cumsum(daily_returns)                # Cumulative sum
np.cumprod(1 + daily_returns)           # Cumulative product (wealth path)

# Statistical functions
np.corrcoef(x, y)                       # Correlation matrix
np.cov(x, y)                            # Covariance matrix
np.histogram(prices, bins=20)           # Histogram counts and bin edges

Linear Algebra

# Matrix operations
A = np.array([[1, 2], [3, 4]])
b = np.array([5, 6])

np.dot(A, b)                            # Matrix-vector product
A @ b                                    # Same as np.dot (Python 3.5+)
np.linalg.inv(A)                        # Matrix inverse
np.linalg.det(A)                        # Determinant
eigenvalues, eigenvectors = np.linalg.eig(A)  # Eigendecomposition
np.linalg.solve(A, b)                   # Solve Ax = b
np.linalg.norm(b)                       # Euclidean norm

Random Number Generation

rng = np.random.default_rng(seed=42)     # Reproducible random generator

rng.random(1000)                         # Uniform [0, 1)
rng.normal(0, 1, 1000)                  # Standard normal samples
rng.binomial(n=100, p=0.6, size=1000)   # Binomial samples
rng.beta(a=2, b=5, size=1000)           # Beta samples
rng.choice(markets, size=50, replace=False)  # Random sample without replacement
rng.shuffle(data)                        # Shuffle in place

C.3 Pandas Reference

DataFrame Creation and I/O

import pandas as pd

# Creation
df = pd.DataFrame({
    "date": pd.date_range("2025-01-01", periods=100),
    "price": np.random.uniform(0.3, 0.7, 100),
    "volume": np.random.randint(100, 5000, 100)
})

# File I/O
df = pd.read_csv("market_data.csv", parse_dates=["date"])
df = pd.read_json("market_data.json")
df.to_csv("output.csv", index=False)
df.to_parquet("output.parquet")          # Fast binary format

Selection and Filtering

# Column selection
df["price"]                              # Single column (Series)
df[["price", "volume"]]                 # Multiple columns (DataFrame)

# Row selection
df.loc[0]                                # By label
df.iloc[0]                               # By position
df.loc[df["price"] > 0.5]              # Boolean filter
df.query("price > 0.5 and volume > 1000")  # Query string

# Setting values
df.loc[df["price"] > 0.9, "flag"] = "high_confidence"

Common Transformations

# Sorting
df.sort_values("date", ascending=True)
df.sort_values(["volume", "price"], ascending=[False, True])

# New columns
df["log_odds"] = np.log(df["price"] / (1 - df["price"]))
df["return"] = df["price"].pct_change()
df["rolling_avg"] = df["price"].rolling(window=7).mean()
df["cumulative_return"] = (1 + df["return"]).cumprod()

# Apply custom functions
df["edge"] = df.apply(lambda row: model_predict(row) - row["price"], axis=1)

# Handling missing values
df.dropna(subset=["price"])              # Drop rows with NaN price
df["volume"].fillna(0, inplace=True)     # Fill NaN with 0
df.interpolate(method="time")           # Time-based interpolation

GroupBy and Aggregation

# Group by category
grouped = df.groupby("category")
grouped["price"].mean()                  # Mean price per category
grouped.agg({"price": ["mean", "std"], "volume": "sum"})  # Multiple aggregations

# Pivot tables
pd.pivot_table(df, values="return", index="month", columns="category", aggfunc="mean")

# Resampling time series
daily = df.set_index("date").resample("D").agg({"price": "last", "volume": "sum"})
weekly = daily.resample("W").mean()

Merging and Joining

# Merge on common column
combined = pd.merge(market_df, poll_df, on="question_id", how="left")

# Concatenate DataFrames
all_data = pd.concat([df_2024, df_2025], ignore_index=True)

# Join on index
result = market_prices.join(model_predictions, how="inner")

C.4 Matplotlib / Seaborn Reference

Basic Plot Setup

import matplotlib.pyplot as plt
import seaborn as sns

# Style configuration used throughout this book
plt.style.use("seaborn-v0_8-whitegrid")
sns.set_palette("colorblind")
plt.rcParams["figure.figsize"] = (10, 6)
plt.rcParams["font.size"] = 12

Time Series Plots (Market Price Histories)

fig, ax = plt.subplots()
ax.plot(dates, prices, linewidth=1.5, label="Market Price")
ax.axhline(y=0.5, color="gray", linestyle="--", alpha=0.5, label="50% threshold")
ax.fill_between(dates, lower_bound, upper_bound, alpha=0.2, label="95% CI")
ax.set_xlabel("Date")
ax.set_ylabel("Implied Probability")
ax.set_title("Market Price Over Time")
ax.legend()
ax.set_ylim(0, 1)
fig.autofmt_xdate()
plt.tight_layout()
plt.savefig("price_history.png", dpi=150, bbox_inches="tight")
plt.show()

Calibration Plots

fig, ax = plt.subplots()
ax.plot([0, 1], [0, 1], "k--", label="Perfect calibration")
ax.scatter(predicted_probs, observed_freqs, s=sizes, alpha=0.7, label="Model")
ax.set_xlabel("Predicted Probability")
ax.set_ylabel("Observed Frequency")
ax.set_title("Calibration Plot")
ax.legend()
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_aspect("equal")

Distribution Plots

# Histogram with KDE
fig, ax = plt.subplots()
sns.histplot(returns, bins=50, kde=True, stat="density", ax=ax)
ax.axvline(x=0, color="red", linestyle="--", label="Break-even")
ax.set_xlabel("Return per Trade")
ax.set_title("Distribution of Returns")

# Box plot comparing strategies
fig, ax = plt.subplots()
sns.boxplot(data=results_df, x="strategy", y="return", ax=ax)
ax.set_title("Return Distribution by Strategy")

Heatmaps (Correlation Matrices, Kelly Tables)

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="RdBu_r",
            center=0, vmin=-1, vmax=1, square=True, ax=ax)
ax.set_title("Market Correlation Matrix")

Bar Charts (Brier Scores, Model Comparison)

fig, ax = plt.subplots()
models = ["Baseline", "Logistic", "Random Forest", "Ensemble", "Market"]
scores = [0.250, 0.185, 0.172, 0.165, 0.170]
colors = ["gray"] + ["steelblue"] * 3 + ["coral"]
ax.barh(models, scores, color=colors)
ax.set_xlabel("Mean Brier Score (lower is better)")
ax.set_title("Model Comparison")
ax.invert_yaxis()

Scatter Plots (Price vs. Feature, Model vs. Market)

fig, ax = plt.subplots()
ax.scatter(model_probs, market_prices, alpha=0.4, s=20)
ax.plot([0, 1], [0, 1], "r--", label="Agreement line")
ax.set_xlabel("Model Probability")
ax.set_ylabel("Market Price")
ax.set_title("Model vs. Market")
ax.legend()

C.5 scikit-learn Reference

Classification Models

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Logistic regression pipeline
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", LogisticRegression(C=1.0, max_iter=1000))
])
pipe.fit(X_train, y_train)
probs = pipe.predict_proba(X_test)[:, 1]   # Probability of positive class

# Random forest
rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)
rf.fit(X_train, y_train)
rf_probs = rf.predict_proba(X_test)[:, 1]

# Gradient boosting
gb = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=4)
gb.fit(X_train, y_train)
gb_probs = gb.predict_proba(X_test)[:, 1]

# Cross-validation
scores = cross_val_score(pipe, X, y, cv=5, scoring="neg_brier_score")
print(f"Mean Brier: {-scores.mean():.4f} +/- {scores.std():.4f}")

Probability Calibration

from sklearn.calibration import CalibratedClassifierCV, calibration_curve

# Platt scaling (sigmoid) or isotonic calibration
calibrated = CalibratedClassifierCV(rf, method="sigmoid", cv=5)
calibrated.fit(X_train, y_train)
calibrated_probs = calibrated.predict_proba(X_test)[:, 1]

# Calibration curve for evaluation
fraction_pos, mean_predicted = calibration_curve(y_test, probs, n_bins=10)

Evaluation Metrics

from sklearn.metrics import (
    brier_score_loss, log_loss, roc_auc_score,
    accuracy_score, classification_report, confusion_matrix
)

# Scoring metrics (lower is better for brier and log loss)
brier = brier_score_loss(y_test, probs)
logloss = log_loss(y_test, probs)
auc = roc_auc_score(y_test, probs)

# Classification report
print(classification_report(y_test, (probs > 0.5).astype(int)))

Feature Importance

# From tree-based models
importances = rf.feature_importances_
feature_ranking = sorted(zip(feature_names, importances), key=lambda x: -x[1])

# Permutation importance (model-agnostic)
from sklearn.inspection import permutation_importance
result = permutation_importance(rf, X_test, y_test, n_repeats=10, random_state=42)

C.6 Prediction Market API Reference

Polymarket API

Polymarket uses a CLOB (Central Limit Order Book) model. The API is REST-based with JSON responses.

import requests

BASE_URL = "https://clob.polymarket.com"

# Get list of active markets
response = requests.get(f"{BASE_URL}/markets", params={"active": True, "limit": 50})
markets = response.json()

# Example response structure for a single market:
# {
#     "condition_id": "0x1234abcd...",
#     "question": "Will X happen by December 2026?",
#     "tokens": [
#         {"token_id": "123", "outcome": "Yes", "price": 0.62},
#         {"token_id": "456", "outcome": "No", "price": 0.38}
#     ],
#     "end_date_iso": "2026-12-31T00:00:00Z",
#     "volume": "1500000",
#     "active": true
# }

# Get order book for a specific token
response = requests.get(f"{BASE_URL}/book", params={"token_id": "123"})
order_book = response.json()
# Returns bids and asks with price levels and sizes

# Get price history
response = requests.get(f"{BASE_URL}/prices-history",
    params={"market": "0x1234abcd...", "interval": "1d", "fidelity": 60}
)
price_history = response.json()

Kalshi API

Kalshi is a CFTC-regulated exchange. API access requires authentication.

BASE_URL = "https://trading-api.kalshi.com/trade-api/v2"

# Authentication
session = requests.Session()
login_response = session.post(f"{BASE_URL}/login",
    json={"email": "user@example.com", "password": "password"}
)
token = login_response.json()["token"]
session.headers.update({"Authorization": f"Bearer {token}"})

# Get events (groups of related markets)
response = session.get(f"{BASE_URL}/events", params={"status": "open", "limit": 50})
events = response.json()["events"]

# Get markets within an event
response = session.get(f"{BASE_URL}/markets",
    params={"event_ticker": "EVENT-TICKER", "limit": 100}
)
markets = response.json()["markets"]

# Example market structure:
# {
#     "ticker": "MARKET-TICKER",
#     "title": "Will X happen?",
#     "yes_bid": 62,  (in cents)
#     "yes_ask": 64,
#     "volume": 50000,
#     "open_interest": 12000,
#     "close_time": "2026-12-31T23:59:59Z"
# }

# Get market history (candlestick data)
response = session.get(f"{BASE_URL}/markets/{ticker}/history",
    params={"period_interval": 1440}  # Daily candles in minutes
)

Metaculus API

Metaculus focuses on calibrated community forecasts. The API is publicly accessible for reading.

BASE_URL = "https://www.metaculus.com/api2"

# Get questions
response = requests.get(f"{BASE_URL}/questions/",
    params={"status": "open", "type": "forecast", "limit": 50, "order_by": "-activity"}
)
questions = response.json()["results"]

# Example question structure:
# {
#     "id": 12345,
#     "title": "Will X happen by 2026?",
#     "created_time": "2025-01-15T...",
#     "close_time": "2026-12-31T...",
#     "resolve_time": "2027-01-15T...",
#     "community_prediction": {
#         "full": {"q1": 0.35, "median": 0.55, "q3": 0.72}
#     },
#     "number_of_predictions": 245,
#     "type": "binary"
# }

# Get detailed question with prediction history
response = requests.get(f"{BASE_URL}/questions/{question_id}/")
question = response.json()

# Get prediction history
response = requests.get(f"{BASE_URL}/questions/{question_id}/prediction-timeseries/")
timeseries = response.json()

Manifold Markets API

Manifold uses play-money markets with a generous public API.

BASE_URL = "https://api.manifold.markets/v0"

# Get markets (no auth needed for reading)
response = requests.get(f"{BASE_URL}/markets", params={"limit": 100, "sort": "liquidity"})
markets = response.json()

# Example market structure:
# {
#     "id": "abc123",
#     "question": "Will X happen?",
#     "probability": 0.62,
#     "pool": {"YES": 5000, "NO": 3000},
#     "totalLiquidity": 8000,
#     "volume": 25000,
#     "createdTime": 1700000000000,
#     "closeTime": 1735689600000,
#     "mechanism": "cpmm-1",
#     "outcomeType": "BINARY"
# }

# Get single market with full details
response = requests.get(f"{BASE_URL}/market/{market_id}")
market = response.json()

# Get market positions
response = requests.get(f"{BASE_URL}/market/{market_id}/positions",
    params={"order": "profit", "top": 20}
)

# Get bets on a market
response = requests.get(f"{BASE_URL}/bets", params={"marketId": market_id, "limit": 1000})
bets = response.json()

# Search markets
response = requests.get(f"{BASE_URL}/search-markets",
    params={"term": "election", "sort": "liquidity", "limit": 50}
)

C.7 The pmtools Module Reference

The pmtools module is a utility library built incrementally in Chapter 6. It provides convenience functions for common prediction market analysis tasks. Below is a reference for the complete module as it exists by the end of the book.

Data Fetching

from pmtools import data

# Fetch current market data
markets = data.fetch_polymarket_markets(active=True, limit=100)
markets = data.fetch_kalshi_markets(status="open", limit=100)
markets = data.fetch_metaculus_questions(status="open", limit=100)
markets = data.fetch_manifold_markets(sort="liquidity", limit=100)

# Fetch price history
history = data.get_price_history(platform="polymarket", market_id="0x1234",
                                 start="2025-01-01", end="2025-12-31")
# Returns: pd.DataFrame with columns [date, price, volume]

# Unified market search across platforms
results = data.search_markets(query="election", platforms=["polymarket", "manifold"])
# Returns: pd.DataFrame with standardized columns [platform, id, question, prob, volume, close_date]

Probability and Scoring

from pmtools import scoring

# Scoring rules
scoring.brier_score(predicted=0.7, outcome=1)        # Returns 0.09
scoring.log_score(predicted=0.7, outcome=1)           # Returns -0.357
scoring.spherical_score(predicted=0.7, outcome=1)     # Returns 0.91

# Batch scoring
scores = scoring.brier_scores(predictions_array, outcomes_array)
mean_brier = scoring.mean_brier(predictions_array, outcomes_array)

# Calibration analysis
cal = scoring.calibration_report(predictions, outcomes, n_bins=10)
# Returns: pd.DataFrame with columns [bin_center, predicted_mean, observed_freq, count]

scoring.plot_calibration(predictions, outcomes, n_bins=10, title="My Model")
# Generates and displays a calibration plot

# Brier score decomposition
reliability, resolution, uncertainty = scoring.brier_decomposition(predictions, outcomes)

Probability Utilities

from pmtools import prob

# Conversions
prob.to_odds(0.75)                        # Returns 3.0  (probability to odds ratio)
prob.from_odds(3.0)                       # Returns 0.75
prob.to_decimal_odds(0.75)               # Returns 1.333
prob.to_american_odds(0.75)              # Returns -300
prob.from_american_odds(-300)            # Returns 0.75
prob.implied_prob(decimal_odds=1.33, vig=0.045)  # Adjusts for vigorish

# Bayesian updating
posterior = prob.bayes_update(prior=0.5, likelihood_ratio=2.5)  # Returns ~0.714

# Beta-binomial model
dist = prob.beta_model(successes=7, failures=3)  # Returns Beta(8, 4) parameters
mean, var, ci = dist.summary()            # Mean, variance, 95% credible interval
dist.plot()                               # Plot the posterior distribution

Betting and Bankroll Management

from pmtools import kelly

# Kelly criterion
fraction = kelly.optimal_fraction(win_prob=0.6, odds=1.5)  # Full Kelly
half_kelly = kelly.fraction(win_prob=0.6, odds=1.5, kelly_mult=0.5)

# Multi-outcome Kelly
fractions = kelly.multi_kelly(
    probabilities=[0.3, 0.5, 0.2],
    odds=[3.5, 2.0, 5.0],
    kelly_mult=0.25
)

# Bankroll simulation
sim = kelly.simulate_kelly(
    win_prob=0.55, odds=1.0, n_bets=1000, n_simulations=10000,
    kelly_mult=0.5, initial_bankroll=1000
)
sim.plot_paths(n_show=50)                 # Plot sample wealth paths
sim.summary()                             # Terminal wealth statistics

# Expected growth rate
growth = kelly.expected_growth_rate(win_prob=0.6, odds=1.5, fraction=0.15)

Backtesting

from pmtools import backtest

# Define a strategy
class SimpleEdgeStrategy(backtest.Strategy):
    def __init__(self, model, threshold=0.05, kelly_mult=0.25):
        self.model = model
        self.threshold = threshold
        self.kelly_mult = kelly_mult

    def evaluate(self, market_state):
        model_prob = self.model.predict_proba(market_state.features)
        edge = model_prob - market_state.price
        if abs(edge) > self.threshold:
            fraction = kelly.optimal_fraction(
                win_prob=model_prob, odds=market_state.odds
            ) * self.kelly_mult
            side = "YES" if edge > 0 else "NO"
            return backtest.Signal(side=side, fraction=fraction)
        return None

# Run backtest
strategy = SimpleEdgeStrategy(trained_model, threshold=0.05, kelly_mult=0.25)
results = backtest.run(
    strategy=strategy,
    data=historical_markets,
    initial_bankroll=10000,
    start_date="2025-01-01",
    end_date="2025-12-31"
)

# Analyze results
results.summary()                         # Returns, Sharpe, max drawdown, win rate
results.plot_equity_curve()              # Equity curve over time
results.plot_drawdowns()                 # Drawdown chart
results.trade_log                        # pd.DataFrame of all trades
results.monthly_returns()               # Monthly return breakdown

Visualization

from pmtools import viz

# Market-specific plots
viz.plot_price_history(market_id, platform="polymarket", start="2025-01-01")
viz.plot_orderbook_depth(market_id, platform="polymarket")
viz.plot_volume_profile(market_id, platform="polymarket")

# Comparison plots
viz.compare_platforms(question="Will X happen?", platforms=["polymarket", "kalshi", "metaculus"])
viz.compare_models(predictions_dict, outcomes, metric="brier")

# Strategy performance plots
viz.plot_equity_curve(backtest_results)
viz.plot_rolling_sharpe(backtest_results, window=30)
viz.plot_return_distribution(backtest_results)

Summary. This appendix provides a focused reference for the programming tools used throughout the book. Section C.1 covers the core Python patterns that appear in nearly every code example. Sections C.2 through C.5 document the key functions from NumPy, Pandas, Matplotlib/Seaborn, and scikit-learn. Section C.6 provides authenticated and unauthenticated API access patterns for the four major prediction market platforms. Section C.7 documents the pmtools module that readers build as they work through the book, which encapsulates the most common prediction market analysis tasks into a reusable library.