Appendix B: Python Code Reference for AI APIs

This appendix provides complete, working Python 3.10+ code examples for the major AI APIs used throughout this book. All examples are designed to run as-is after installing the required packages and configuring your API keys.


1. Environment Setup

Installing Required Packages

pip install anthropic openai google-generativeai python-dotenv tenacity

Setting Up a .env File

Create a file named .env in your project root. Never commit this file to version control.

ANTHROPIC_API_KEY=sk-ant-your-key-here
OPENAI_API_KEY=sk-your-key-here
GOOGLE_API_KEY=your-google-key-here

Loading Environment Variables

import os
from dotenv import load_dotenv

load_dotenv()  # Loads variables from .env into environment

anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")
google_key = os.environ.get("GOOGLE_API_KEY")

if not anthropic_key:
    raise ValueError("ANTHROPIC_API_KEY not found in environment variables.")

2. Anthropic Claude API

2.1 Basic Message Creation

import anthropic

client = anthropic.Anthropic()  # Reads ANTHROPIC_API_KEY from environment

message = client.messages.create(
    model="claude-opus-4-5",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "What is the capital of France?"}
    ]
)

print(message.content[0].text)
# Output: "The capital of France is Paris."

# Accessing metadata
print(f"Input tokens: {message.usage.input_tokens}")
print(f"Output tokens: {message.usage.output_tokens}")
print(f"Stop reason: {message.stop_reason}")

2.2 System Prompts

import anthropic

client = anthropic.Anthropic()

message = client.messages.create(
    model="claude-opus-4-5",
    max_tokens=1024,
    system=(
        "You are a concise technical writer. "
        "Always respond in plain English suitable for a general audience. "
        "Keep answers to three sentences or fewer unless asked to elaborate."
    ),
    messages=[
        {"role": "user", "content": "What is a REST API?"}
    ]
)

print(message.content[0].text)

2.3 Multi-Turn Conversations

import anthropic

client = anthropic.Anthropic()

def chat(conversation_history: list[dict], user_input: str) -> tuple[str, list[dict]]:
    """
    Send a message in a multi-turn conversation.
    Returns the assistant's reply and the updated conversation history.
    """
    conversation_history.append({"role": "user", "content": user_input})

    response = client.messages.create(
        model="claude-opus-4-5",
        max_tokens=2048,
        system="You are a helpful assistant.",
        messages=conversation_history
    )

    assistant_reply = response.content[0].text
    conversation_history.append({"role": "assistant", "content": assistant_reply})

    return assistant_reply, conversation_history


# Example usage
history = []

reply, history = chat(history, "I am planning a trip to Japan in April.")
print(f"Assistant: {reply}\n")

reply, history = chat(history, "What should I pack for the weather?")
print(f"Assistant: {reply}\n")

reply, history = chat(history, "What is the best region to visit for cherry blossoms?")
print(f"Assistant: {reply}\n")

# Inspect the full conversation
for turn in history:
    print(f"[{turn['role'].upper()}]: {turn['content'][:80]}...")

2.4 Streaming

import anthropic

client = anthropic.Anthropic()

print("Streaming response:")
print("-" * 40)

with client.messages.stream(
    model="claude-opus-4-5",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Write a short poem about morning coffee."}
    ]
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

print("\n" + "-" * 40)

# Accessing the final message after streaming completes
final_message = stream.get_final_message()
print(f"\nTotal tokens used: {final_message.usage.input_tokens + final_message.usage.output_tokens}")

2.5 Token Counting

import anthropic

client = anthropic.Anthropic()

# Count tokens before sending (useful for staying within context limits)
messages = [
    {"role": "user", "content": "Explain the history of the Roman Empire in detail."}
]

token_count = client.messages.count_tokens(
    model="claude-opus-4-5",
    system="You are a knowledgeable historian.",
    messages=messages
)

print(f"This request will use approximately {token_count.input_tokens} input tokens.")

# Estimated cost (as of early 2025 — verify current pricing)
# Claude Sonnet: $3.00 per million input tokens
cost_estimate = (token_count.input_tokens / 1_000_000) * 3.00
print(f"Estimated input cost: ${cost_estimate:.6f}")

2.6 Model Selection

import anthropic

client = anthropic.Anthropic()

# Available Claude models (verify current availability at docs.anthropic.com)
MODELS = {
    "haiku": "claude-haiku-4-5",      # Fastest, lowest cost — good for classification, simple Q&A
    "sonnet": "claude-sonnet-4-5",    # Balanced speed and capability — good for most tasks
    "opus": "claude-opus-4-5",        # Highest capability — complex reasoning, nuanced writing
}

def ask_claude(prompt: str, model_tier: str = "sonnet", max_tokens: int = 1024) -> str:
    """Query Claude with a specified model tier."""
    model_id = MODELS.get(model_tier, MODELS["sonnet"])

    response = client.messages.create(
        model=model_id,
        max_tokens=max_tokens,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.content[0].text


# Route tasks to appropriate models
simple_task = ask_claude("Is 'affect' or 'effect' correct here: 'The rain ___ my mood'?", model_tier="haiku")
print(f"Haiku response: {simple_task}\n")

complex_task = ask_claude(
    "Analyze the strategic implications of quantum computing for cybersecurity over the next decade.",
    model_tier="opus",
    max_tokens=2048
)
print(f"Opus response (first 200 chars): {complex_task[:200]}...")

3. OpenAI API

3.1 Basic Chat Completion

from openai import OpenAI

client = OpenAI()  # Reads OPENAI_API_KEY from environment

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "user", "content": "What is the tallest mountain in the world?"}
    ]
)

print(response.choices[0].message.content)
print(f"\nTokens used — Input: {response.usage.prompt_tokens}, Output: {response.usage.completion_tokens}")

3.2 System Messages

from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {
            "role": "system",
            "content": (
                "You are an expert data analyst. "
                "When asked to interpret data, always comment on: "
                "(1) the trend, (2) any outliers, and (3) what additional data would strengthen the analysis."
            )
        },
        {
            "role": "user",
            "content": "Monthly sales: Jan 120, Feb 115, Mar 130, Apr 195, May 128, Jun 122"
        }
    ]
)

print(response.choices[0].message.content)

3.3 Multi-Turn Conversations

from openai import OpenAI

client = OpenAI()

conversation = [
    {
        "role": "system",
        "content": "You are a helpful cooking assistant. Keep suggestions practical and beginner-friendly."
    }
]

def chat_openai(conversation: list[dict], user_message: str) -> tuple[str, list[dict]]:
    """Add a user message, get a response, and return the updated conversation."""
    conversation.append({"role": "user", "content": user_message})

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=conversation
    )

    assistant_message = response.choices[0].message.content
    conversation.append({"role": "assistant", "content": assistant_message})

    return assistant_message, conversation


reply, conversation = chat_openai(conversation, "I have chicken, garlic, and spinach. What can I make?")
print(f"Assistant: {reply}\n")

reply, conversation = chat_openai(conversation, "How long does that take to cook?")
print(f"Assistant: {reply}\n")

3.4 Streaming

from openai import OpenAI

client = OpenAI()

print("Streaming OpenAI response:")
print("-" * 40)

stream = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "Explain machine learning in simple terms."}],
    stream=True
)

collected_text = []
for chunk in stream:
    delta = chunk.choices[0].delta
    if delta.content is not None:
        print(delta.content, end="", flush=True)
        collected_text.append(delta.content)

print("\n" + "-" * 40)
full_response = "".join(collected_text)
print(f"Total characters: {len(full_response)}")

3.5 Function Calling Overview

import json
from openai import OpenAI

client = OpenAI()

# Define tools (functions) the model can choose to call
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather for a given city",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "The city name, e.g., 'London'"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "Temperature unit"
                    }
                },
                "required": ["city"]
            }
        }
    }
]

def get_weather(city: str, unit: str = "celsius") -> dict:
    """Mock weather function — replace with real API call in production."""
    return {"city": city, "temperature": 18, "unit": unit, "condition": "partly cloudy"}


response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": "What is the weather like in Tokyo?"}],
    tools=tools,
    tool_choice="auto"
)

message = response.choices[0].message

# Check if the model decided to call a function
if message.tool_calls:
    tool_call = message.tool_calls[0]
    function_name = tool_call.function.name
    function_args = json.loads(tool_call.function.arguments)

    print(f"Model wants to call: {function_name}({function_args})")

    # Execute the function
    result = get_weather(**function_args)

    # Send the result back to the model for a natural language response
    followup_response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": "What is the weather like in Tokyo?"},
            message,
            {
                "role": "tool",
                "content": json.dumps(result),
                "tool_call_id": tool_call.id
            }
        ]
    )
    print(f"\nFinal response: {followup_response.choices[0].message.content}")
else:
    print(message.content)

4. Google Generative AI — Basic Example

import google.generativeai as genai
import os

genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))

# Basic text generation
model = genai.GenerativeModel("gemini-1.5-pro")

response = model.generate_content("Summarize the key principles of good scientific writing.")
print(response.text)

# Multi-turn chat with Gemini
chat = model.start_chat(history=[])

reply = chat.send_message("I am learning Python. Where should I start?")
print(f"Gemini: {reply.text}\n")

reply = chat.send_message("What resources do you recommend for someone who learns best by doing?")
print(f"Gemini: {reply.text}\n")

5. Utility Functions

5.1 Retry with Exponential Backoff

import time
import random
import anthropic
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type


@retry(
    retry=retry_if_exception_type((anthropic.RateLimitError, anthropic.APIStatusError)),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    stop=stop_after_attempt(5)
)
def call_claude_with_retry(client: anthropic.Anthropic, prompt: str, model: str = "claude-sonnet-4-5") -> str:
    """Call Claude API with automatic retry on rate limit or server errors."""
    response = client.messages.create(
        model=model,
        max_tokens=1024,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.content[0].text


# Manual implementation without tenacity
def call_with_backoff(func, max_retries: int = 5, base_delay: float = 1.0):
    """
    Generic retry wrapper with exponential backoff and jitter.
    Works with any callable that may raise transient exceptions.
    """
    for attempt in range(max_retries):
        try:
            return func()
        except Exception as e:
            if attempt == max_retries - 1:
                raise

            delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
            print(f"Attempt {attempt + 1} failed: {e}. Retrying in {delay:.1f}s...")
            time.sleep(delay)

5.2 Token Cost Estimator

from dataclasses import dataclass


@dataclass
class ModelPricing:
    input_per_million: float   # USD per 1M input tokens
    output_per_million: float  # USD per 1M output tokens


# Pricing as of early 2025 — verify at provider websites before use
PRICING = {
    "claude-haiku-4-5":    ModelPricing(input_per_million=0.25,  output_per_million=1.25),
    "claude-sonnet-4-5":   ModelPricing(input_per_million=3.00,  output_per_million=15.00),
    "claude-opus-4-5":     ModelPricing(input_per_million=15.00, output_per_million=75.00),
    "gpt-4o":              ModelPricing(input_per_million=5.00,  output_per_million=15.00),
    "gpt-4o-mini":         ModelPricing(input_per_million=0.15,  output_per_million=0.60),
}


def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> dict:
    """
    Estimate the cost of an API call given token counts.
    Returns a dict with input cost, output cost, and total cost in USD.
    """
    if model not in PRICING:
        raise ValueError(f"Unknown model: {model}. Add it to the PRICING dict.")

    pricing = PRICING[model]
    input_cost = (input_tokens / 1_000_000) * pricing.input_per_million
    output_cost = (output_tokens / 1_000_000) * pricing.output_per_million

    return {
        "model": model,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "input_cost_usd": round(input_cost, 6),
        "output_cost_usd": round(output_cost, 6),
        "total_cost_usd": round(input_cost + output_cost, 6),
    }


# Example
cost = estimate_cost("claude-sonnet-4-5", input_tokens=1500, output_tokens=400)
print(f"Estimated cost: ${cost['total_cost_usd']:.4f}")

5.3 Batch Processor

import time
import anthropic
from typing import Callable


def process_batch(
    prompts: list[str],
    process_fn: Callable[[str], str],
    delay_between_requests: float = 0.5,
    verbose: bool = True
) -> list[dict]:
    """
    Process a list of prompts sequentially with rate-limit-friendly delays.
    Returns a list of result dicts with prompt, response, and status.
    """
    results = []

    for i, prompt in enumerate(prompts, 1):
        if verbose:
            print(f"Processing {i}/{len(prompts)}...", end=" ")

        try:
            response = process_fn(prompt)
            results.append({
                "index": i,
                "prompt": prompt,
                "response": response,
                "status": "success",
                "error": None
            })
            if verbose:
                print("OK")
        except Exception as e:
            results.append({
                "index": i,
                "prompt": prompt,
                "response": None,
                "status": "error",
                "error": str(e)
            })
            if verbose:
                print(f"ERROR: {e}")

        if i < len(prompts):
            time.sleep(delay_between_requests)

    success_count = sum(1 for r in results if r["status"] == "success")
    if verbose:
        print(f"\nCompleted: {success_count}/{len(prompts)} successful")

    return results


# Example usage
client = anthropic.Anthropic()

def classify_sentiment(text: str) -> str:
    response = client.messages.create(
        model="claude-haiku-4-5",
        max_tokens=10,
        messages=[{
            "role": "user",
            "content": f"Classify the sentiment as POSITIVE, NEGATIVE, or NEUTRAL. Reply with one word only.\n\nText: {text}"
        }]
    )
    return response.content[0].text.strip()


reviews = [
    "This product exceeded all my expectations!",
    "Terrible quality. Would not recommend.",
    "It arrived on time and works as described.",
    "I have mixed feelings — some parts are great, others disappointing.",
]

# Uncomment to run:
# results = process_batch(reviews, classify_sentiment)
# for r in results:
#     print(f"'{r['prompt'][:40]}...' → {r['response']}")

5.4 Simple Conversation Manager Class

import anthropic
from dataclasses import dataclass, field
from datetime import datetime


@dataclass
class Message:
    role: str
    content: str
    timestamp: datetime = field(default_factory=datetime.now)


class ConversationManager:
    """
    Manages a multi-turn conversation with Claude, including
    history persistence and token usage tracking.
    """

    def __init__(
        self,
        system_prompt: str = "You are a helpful assistant.",
        model: str = "claude-sonnet-4-5",
        max_tokens: int = 2048,
        max_history_messages: int = 20
    ):
        self.client = anthropic.Anthropic()
        self.system_prompt = system_prompt
        self.model = model
        self.max_tokens = max_tokens
        self.max_history_messages = max_history_messages
        self.history: list[Message] = []
        self.total_input_tokens = 0
        self.total_output_tokens = 0

    def send(self, user_input: str) -> str:
        """Send a user message and return the assistant's reply."""
        self.history.append(Message(role="user", content=user_input))

        # Trim history to avoid exceeding context limits
        messages_to_send = self.history[-self.max_history_messages:]

        response = self.client.messages.create(
            model=self.model,
            max_tokens=self.max_tokens,
            system=self.system_prompt,
            messages=[{"role": m.role, "content": m.content} for m in messages_to_send]
        )

        reply = response.content[0].text
        self.history.append(Message(role="assistant", content=reply))

        self.total_input_tokens += response.usage.input_tokens
        self.total_output_tokens += response.usage.output_tokens

        return reply

    def get_stats(self) -> dict:
        """Return usage statistics for the conversation."""
        return {
            "total_turns": len([m for m in self.history if m.role == "user"]),
            "total_messages": len(self.history),
            "total_input_tokens": self.total_input_tokens,
            "total_output_tokens": self.total_output_tokens,
            "total_tokens": self.total_input_tokens + self.total_output_tokens,
        }

    def reset(self):
        """Clear conversation history while keeping settings."""
        self.history = []
        self.total_input_tokens = 0
        self.total_output_tokens = 0

    def print_history(self):
        """Print the conversation in a readable format."""
        for msg in self.history:
            prefix = "You" if msg.role == "user" else "Claude"
            print(f"[{prefix}]: {msg.content}\n")


# Example usage
if __name__ == "__main__":
    convo = ConversationManager(
        system_prompt="You are a concise Python tutor. Keep all code examples under 20 lines.",
        model="claude-sonnet-4-5"
    )

    # Uncomment to run an actual conversation:
    # reply = convo.send("How do I read a CSV file in Python?")
    # print(f"Claude: {reply}\n")

    # reply = convo.send("How do I filter rows where a column value is greater than 100?")
    # print(f"Claude: {reply}\n")

    # print(convo.get_stats())

6. Error Handling Reference

import anthropic
from openai import OpenAI, APIError, RateLimitError, APIConnectionError


def robust_claude_call(prompt: str) -> str | None:
    """Demonstrates comprehensive error handling for the Anthropic API."""
    client = anthropic.Anthropic()

    try:
        response = client.messages.create(
            model="claude-sonnet-4-5",
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}]
        )
        return response.content[0].text

    except anthropic.AuthenticationError:
        print("ERROR: Invalid API key. Check your ANTHROPIC_API_KEY environment variable.")
        return None

    except anthropic.RateLimitError as e:
        print(f"ERROR: Rate limit exceeded. Wait before retrying. Details: {e}")
        return None

    except anthropic.BadRequestError as e:
        print(f"ERROR: Bad request — likely an issue with prompt structure. Details: {e}")
        return None

    except anthropic.APIConnectionError as e:
        print(f"ERROR: Could not connect to Anthropic API. Check your internet connection. Details: {e}")
        return None

    except anthropic.APIStatusError as e:
        print(f"ERROR: API returned status {e.status_code}. Message: {e.message}")
        return None


def robust_openai_call(prompt: str) -> str | None:
    """Demonstrates comprehensive error handling for the OpenAI API."""
    client = OpenAI()

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content

    except RateLimitError:
        print("ERROR: OpenAI rate limit exceeded.")
        return None

    except APIConnectionError as e:
        print(f"ERROR: Connection failed. Details: {e}")
        return None

    except APIError as e:
        print(f"ERROR: OpenAI API error {e.status_code}: {e.message}")
        return None

7. Best Practices for Production Code

"""
Production best practices checklist implemented as code patterns.
"""

import os
import logging
import hashlib
import json
import time
from functools import wraps
from typing import Any

import anthropic

# 1. Configure structured logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(name)s %(message)s"
)
logger = logging.getLogger(__name__)


# 2. Never hardcode API keys — always use environment variables
def get_client() -> anthropic.Anthropic:
    api_key = os.environ.get("ANTHROPIC_API_KEY")
    if not api_key:
        raise EnvironmentError("ANTHROPIC_API_KEY must be set before running this application.")
    return anthropic.Anthropic(api_key=api_key)


# 3. Log all API calls with metadata for debugging and cost tracking
def logged_api_call(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        try:
            result = func(*args, **kwargs)
            duration = time.time() - start_time
            logger.info("api_call_success", extra={"function": func.__name__, "duration_s": round(duration, 3)})
            return result
        except Exception as e:
            duration = time.time() - start_time
            logger.error("api_call_failed", extra={"function": func.__name__, "error": str(e), "duration_s": round(duration, 3)})
            raise
    return wrapper


# 4. Simple cache to avoid re-calling the API for identical prompts
class SimplePromptCache:
    def __init__(self):
        self._cache: dict[str, str] = {}

    def _key(self, prompt: str, model: str) -> str:
        content = f"{model}::{prompt}"
        return hashlib.sha256(content.encode()).hexdigest()

    def get(self, prompt: str, model: str) -> str | None:
        return self._cache.get(self._key(prompt, model))

    def set(self, prompt: str, model: str, response: str) -> None:
        self._cache[self._key(prompt, model)] = response

    def size(self) -> int:
        return len(self._cache)


cache = SimplePromptCache()


# 5. Validate and sanitize inputs before sending to the API
def sanitize_prompt(prompt: str, max_length: int = 10_000) -> str:
    """Basic input validation for prompts."""
    if not isinstance(prompt, str):
        raise TypeError(f"Prompt must be a string, got {type(prompt)}")
    if not prompt.strip():
        raise ValueError("Prompt cannot be empty.")
    if len(prompt) > max_length:
        logger.warning(f"Prompt truncated from {len(prompt)} to {max_length} characters.")
        prompt = prompt[:max_length]
    return prompt.strip()


# 6. Use environment-specific model selection
def get_model_for_environment() -> str:
    """Use cheaper/faster models in development to reduce costs."""
    environment = os.environ.get("APP_ENV", "development")
    if environment == "production":
        return "claude-sonnet-4-5"
    else:
        return "claude-haiku-4-5"  # Cheaper for dev/testing


# 7. Set reasonable timeouts
@logged_api_call
def production_api_call(prompt: str) -> str:
    client = get_client()
    prompt = sanitize_prompt(prompt)
    model = get_model_for_environment()

    # Check cache first
    cached = cache.get(prompt, model)
    if cached:
        logger.info("Cache hit — returning cached response.")
        return cached

    response = client.messages.create(
        model=model,
        max_tokens=2048,
        messages=[{"role": "user", "content": prompt}],
        timeout=30.0  # Seconds — prevents hanging indefinitely
    )

    result = response.content[0].text
    cache.set(prompt, model, result)

    logger.info(f"Tokens used: {response.usage.input_tokens} in, {response.usage.output_tokens} out")
    return result

For the latest model IDs, pricing, and API capabilities, always refer to the official documentation: docs.anthropic.com, platform.openai.com/docs, and ai.google.dev. Model names and pricing change frequently.