Appendix B: Python Code Reference for AI APIs
This appendix provides complete, working Python 3.10+ code examples for the major AI APIs used throughout this book. All examples are designed to run as-is after installing the required packages and configuring your API keys.
1. Environment Setup
Installing Required Packages
pip install anthropic openai google-generativeai python-dotenv tenacity
Setting Up a .env File
Create a file named .env in your project root. Never commit this file to version control.
ANTHROPIC_API_KEY=sk-ant-your-key-here
OPENAI_API_KEY=sk-your-key-here
GOOGLE_API_KEY=your-google-key-here
Loading Environment Variables
import os
from dotenv import load_dotenv
load_dotenv() # Loads variables from .env into environment
anthropic_key = os.environ.get("ANTHROPIC_API_KEY")
openai_key = os.environ.get("OPENAI_API_KEY")
google_key = os.environ.get("GOOGLE_API_KEY")
if not anthropic_key:
raise ValueError("ANTHROPIC_API_KEY not found in environment variables.")
2. Anthropic Claude API
2.1 Basic Message Creation
import anthropic
client = anthropic.Anthropic() # Reads ANTHROPIC_API_KEY from environment
message = client.messages.create(
model="claude-opus-4-5",
max_tokens=1024,
messages=[
{"role": "user", "content": "What is the capital of France?"}
]
)
print(message.content[0].text)
# Output: "The capital of France is Paris."
# Accessing metadata
print(f"Input tokens: {message.usage.input_tokens}")
print(f"Output tokens: {message.usage.output_tokens}")
print(f"Stop reason: {message.stop_reason}")
2.2 System Prompts
import anthropic
client = anthropic.Anthropic()
message = client.messages.create(
model="claude-opus-4-5",
max_tokens=1024,
system=(
"You are a concise technical writer. "
"Always respond in plain English suitable for a general audience. "
"Keep answers to three sentences or fewer unless asked to elaborate."
),
messages=[
{"role": "user", "content": "What is a REST API?"}
]
)
print(message.content[0].text)
2.3 Multi-Turn Conversations
import anthropic
client = anthropic.Anthropic()
def chat(conversation_history: list[dict], user_input: str) -> tuple[str, list[dict]]:
"""
Send a message in a multi-turn conversation.
Returns the assistant's reply and the updated conversation history.
"""
conversation_history.append({"role": "user", "content": user_input})
response = client.messages.create(
model="claude-opus-4-5",
max_tokens=2048,
system="You are a helpful assistant.",
messages=conversation_history
)
assistant_reply = response.content[0].text
conversation_history.append({"role": "assistant", "content": assistant_reply})
return assistant_reply, conversation_history
# Example usage
history = []
reply, history = chat(history, "I am planning a trip to Japan in April.")
print(f"Assistant: {reply}\n")
reply, history = chat(history, "What should I pack for the weather?")
print(f"Assistant: {reply}\n")
reply, history = chat(history, "What is the best region to visit for cherry blossoms?")
print(f"Assistant: {reply}\n")
# Inspect the full conversation
for turn in history:
print(f"[{turn['role'].upper()}]: {turn['content'][:80]}...")
2.4 Streaming
import anthropic
client = anthropic.Anthropic()
print("Streaming response:")
print("-" * 40)
with client.messages.stream(
model="claude-opus-4-5",
max_tokens=1024,
messages=[
{"role": "user", "content": "Write a short poem about morning coffee."}
]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print("\n" + "-" * 40)
# Accessing the final message after streaming completes
final_message = stream.get_final_message()
print(f"\nTotal tokens used: {final_message.usage.input_tokens + final_message.usage.output_tokens}")
2.5 Token Counting
import anthropic
client = anthropic.Anthropic()
# Count tokens before sending (useful for staying within context limits)
messages = [
{"role": "user", "content": "Explain the history of the Roman Empire in detail."}
]
token_count = client.messages.count_tokens(
model="claude-opus-4-5",
system="You are a knowledgeable historian.",
messages=messages
)
print(f"This request will use approximately {token_count.input_tokens} input tokens.")
# Estimated cost (as of early 2025 — verify current pricing)
# Claude Sonnet: $3.00 per million input tokens
cost_estimate = (token_count.input_tokens / 1_000_000) * 3.00
print(f"Estimated input cost: ${cost_estimate:.6f}")
2.6 Model Selection
import anthropic
client = anthropic.Anthropic()
# Available Claude models (verify current availability at docs.anthropic.com)
MODELS = {
"haiku": "claude-haiku-4-5", # Fastest, lowest cost — good for classification, simple Q&A
"sonnet": "claude-sonnet-4-5", # Balanced speed and capability — good for most tasks
"opus": "claude-opus-4-5", # Highest capability — complex reasoning, nuanced writing
}
def ask_claude(prompt: str, model_tier: str = "sonnet", max_tokens: int = 1024) -> str:
"""Query Claude with a specified model tier."""
model_id = MODELS.get(model_tier, MODELS["sonnet"])
response = client.messages.create(
model=model_id,
max_tokens=max_tokens,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
# Route tasks to appropriate models
simple_task = ask_claude("Is 'affect' or 'effect' correct here: 'The rain ___ my mood'?", model_tier="haiku")
print(f"Haiku response: {simple_task}\n")
complex_task = ask_claude(
"Analyze the strategic implications of quantum computing for cybersecurity over the next decade.",
model_tier="opus",
max_tokens=2048
)
print(f"Opus response (first 200 chars): {complex_task[:200]}...")
3. OpenAI API
3.1 Basic Chat Completion
from openai import OpenAI
client = OpenAI() # Reads OPENAI_API_KEY from environment
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": "What is the tallest mountain in the world?"}
]
)
print(response.choices[0].message.content)
print(f"\nTokens used — Input: {response.usage.prompt_tokens}, Output: {response.usage.completion_tokens}")
3.2 System Messages
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": (
"You are an expert data analyst. "
"When asked to interpret data, always comment on: "
"(1) the trend, (2) any outliers, and (3) what additional data would strengthen the analysis."
)
},
{
"role": "user",
"content": "Monthly sales: Jan 120, Feb 115, Mar 130, Apr 195, May 128, Jun 122"
}
]
)
print(response.choices[0].message.content)
3.3 Multi-Turn Conversations
from openai import OpenAI
client = OpenAI()
conversation = [
{
"role": "system",
"content": "You are a helpful cooking assistant. Keep suggestions practical and beginner-friendly."
}
]
def chat_openai(conversation: list[dict], user_message: str) -> tuple[str, list[dict]]:
"""Add a user message, get a response, and return the updated conversation."""
conversation.append({"role": "user", "content": user_message})
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=conversation
)
assistant_message = response.choices[0].message.content
conversation.append({"role": "assistant", "content": assistant_message})
return assistant_message, conversation
reply, conversation = chat_openai(conversation, "I have chicken, garlic, and spinach. What can I make?")
print(f"Assistant: {reply}\n")
reply, conversation = chat_openai(conversation, "How long does that take to cook?")
print(f"Assistant: {reply}\n")
3.4 Streaming
from openai import OpenAI
client = OpenAI()
print("Streaming OpenAI response:")
print("-" * 40)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Explain machine learning in simple terms."}],
stream=True
)
collected_text = []
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content is not None:
print(delta.content, end="", flush=True)
collected_text.append(delta.content)
print("\n" + "-" * 40)
full_response = "".join(collected_text)
print(f"Total characters: {len(full_response)}")
3.5 Function Calling Overview
import json
from openai import OpenAI
client = OpenAI()
# Define tools (functions) the model can choose to call
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city name, e.g., 'London'"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["city"]
}
}
}
]
def get_weather(city: str, unit: str = "celsius") -> dict:
"""Mock weather function — replace with real API call in production."""
return {"city": city, "temperature": 18, "unit": unit, "condition": "partly cloudy"}
response = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "What is the weather like in Tokyo?"}],
tools=tools,
tool_choice="auto"
)
message = response.choices[0].message
# Check if the model decided to call a function
if message.tool_calls:
tool_call = message.tool_calls[0]
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
print(f"Model wants to call: {function_name}({function_args})")
# Execute the function
result = get_weather(**function_args)
# Send the result back to the model for a natural language response
followup_response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "user", "content": "What is the weather like in Tokyo?"},
message,
{
"role": "tool",
"content": json.dumps(result),
"tool_call_id": tool_call.id
}
]
)
print(f"\nFinal response: {followup_response.choices[0].message.content}")
else:
print(message.content)
4. Google Generative AI — Basic Example
import google.generativeai as genai
import os
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
# Basic text generation
model = genai.GenerativeModel("gemini-1.5-pro")
response = model.generate_content("Summarize the key principles of good scientific writing.")
print(response.text)
# Multi-turn chat with Gemini
chat = model.start_chat(history=[])
reply = chat.send_message("I am learning Python. Where should I start?")
print(f"Gemini: {reply.text}\n")
reply = chat.send_message("What resources do you recommend for someone who learns best by doing?")
print(f"Gemini: {reply.text}\n")
5. Utility Functions
5.1 Retry with Exponential Backoff
import time
import random
import anthropic
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
@retry(
retry=retry_if_exception_type((anthropic.RateLimitError, anthropic.APIStatusError)),
wait=wait_exponential(multiplier=1, min=4, max=60),
stop=stop_after_attempt(5)
)
def call_claude_with_retry(client: anthropic.Anthropic, prompt: str, model: str = "claude-sonnet-4-5") -> str:
"""Call Claude API with automatic retry on rate limit or server errors."""
response = client.messages.create(
model=model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
# Manual implementation without tenacity
def call_with_backoff(func, max_retries: int = 5, base_delay: float = 1.0):
"""
Generic retry wrapper with exponential backoff and jitter.
Works with any callable that may raise transient exceptions.
"""
for attempt in range(max_retries):
try:
return func()
except Exception as e:
if attempt == max_retries - 1:
raise
delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
print(f"Attempt {attempt + 1} failed: {e}. Retrying in {delay:.1f}s...")
time.sleep(delay)
5.2 Token Cost Estimator
from dataclasses import dataclass
@dataclass
class ModelPricing:
input_per_million: float # USD per 1M input tokens
output_per_million: float # USD per 1M output tokens
# Pricing as of early 2025 — verify at provider websites before use
PRICING = {
"claude-haiku-4-5": ModelPricing(input_per_million=0.25, output_per_million=1.25),
"claude-sonnet-4-5": ModelPricing(input_per_million=3.00, output_per_million=15.00),
"claude-opus-4-5": ModelPricing(input_per_million=15.00, output_per_million=75.00),
"gpt-4o": ModelPricing(input_per_million=5.00, output_per_million=15.00),
"gpt-4o-mini": ModelPricing(input_per_million=0.15, output_per_million=0.60),
}
def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> dict:
"""
Estimate the cost of an API call given token counts.
Returns a dict with input cost, output cost, and total cost in USD.
"""
if model not in PRICING:
raise ValueError(f"Unknown model: {model}. Add it to the PRICING dict.")
pricing = PRICING[model]
input_cost = (input_tokens / 1_000_000) * pricing.input_per_million
output_cost = (output_tokens / 1_000_000) * pricing.output_per_million
return {
"model": model,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"input_cost_usd": round(input_cost, 6),
"output_cost_usd": round(output_cost, 6),
"total_cost_usd": round(input_cost + output_cost, 6),
}
# Example
cost = estimate_cost("claude-sonnet-4-5", input_tokens=1500, output_tokens=400)
print(f"Estimated cost: ${cost['total_cost_usd']:.4f}")
5.3 Batch Processor
import time
import anthropic
from typing import Callable
def process_batch(
prompts: list[str],
process_fn: Callable[[str], str],
delay_between_requests: float = 0.5,
verbose: bool = True
) -> list[dict]:
"""
Process a list of prompts sequentially with rate-limit-friendly delays.
Returns a list of result dicts with prompt, response, and status.
"""
results = []
for i, prompt in enumerate(prompts, 1):
if verbose:
print(f"Processing {i}/{len(prompts)}...", end=" ")
try:
response = process_fn(prompt)
results.append({
"index": i,
"prompt": prompt,
"response": response,
"status": "success",
"error": None
})
if verbose:
print("OK")
except Exception as e:
results.append({
"index": i,
"prompt": prompt,
"response": None,
"status": "error",
"error": str(e)
})
if verbose:
print(f"ERROR: {e}")
if i < len(prompts):
time.sleep(delay_between_requests)
success_count = sum(1 for r in results if r["status"] == "success")
if verbose:
print(f"\nCompleted: {success_count}/{len(prompts)} successful")
return results
# Example usage
client = anthropic.Anthropic()
def classify_sentiment(text: str) -> str:
response = client.messages.create(
model="claude-haiku-4-5",
max_tokens=10,
messages=[{
"role": "user",
"content": f"Classify the sentiment as POSITIVE, NEGATIVE, or NEUTRAL. Reply with one word only.\n\nText: {text}"
}]
)
return response.content[0].text.strip()
reviews = [
"This product exceeded all my expectations!",
"Terrible quality. Would not recommend.",
"It arrived on time and works as described.",
"I have mixed feelings — some parts are great, others disappointing.",
]
# Uncomment to run:
# results = process_batch(reviews, classify_sentiment)
# for r in results:
# print(f"'{r['prompt'][:40]}...' → {r['response']}")
5.4 Simple Conversation Manager Class
import anthropic
from dataclasses import dataclass, field
from datetime import datetime
@dataclass
class Message:
role: str
content: str
timestamp: datetime = field(default_factory=datetime.now)
class ConversationManager:
"""
Manages a multi-turn conversation with Claude, including
history persistence and token usage tracking.
"""
def __init__(
self,
system_prompt: str = "You are a helpful assistant.",
model: str = "claude-sonnet-4-5",
max_tokens: int = 2048,
max_history_messages: int = 20
):
self.client = anthropic.Anthropic()
self.system_prompt = system_prompt
self.model = model
self.max_tokens = max_tokens
self.max_history_messages = max_history_messages
self.history: list[Message] = []
self.total_input_tokens = 0
self.total_output_tokens = 0
def send(self, user_input: str) -> str:
"""Send a user message and return the assistant's reply."""
self.history.append(Message(role="user", content=user_input))
# Trim history to avoid exceeding context limits
messages_to_send = self.history[-self.max_history_messages:]
response = self.client.messages.create(
model=self.model,
max_tokens=self.max_tokens,
system=self.system_prompt,
messages=[{"role": m.role, "content": m.content} for m in messages_to_send]
)
reply = response.content[0].text
self.history.append(Message(role="assistant", content=reply))
self.total_input_tokens += response.usage.input_tokens
self.total_output_tokens += response.usage.output_tokens
return reply
def get_stats(self) -> dict:
"""Return usage statistics for the conversation."""
return {
"total_turns": len([m for m in self.history if m.role == "user"]),
"total_messages": len(self.history),
"total_input_tokens": self.total_input_tokens,
"total_output_tokens": self.total_output_tokens,
"total_tokens": self.total_input_tokens + self.total_output_tokens,
}
def reset(self):
"""Clear conversation history while keeping settings."""
self.history = []
self.total_input_tokens = 0
self.total_output_tokens = 0
def print_history(self):
"""Print the conversation in a readable format."""
for msg in self.history:
prefix = "You" if msg.role == "user" else "Claude"
print(f"[{prefix}]: {msg.content}\n")
# Example usage
if __name__ == "__main__":
convo = ConversationManager(
system_prompt="You are a concise Python tutor. Keep all code examples under 20 lines.",
model="claude-sonnet-4-5"
)
# Uncomment to run an actual conversation:
# reply = convo.send("How do I read a CSV file in Python?")
# print(f"Claude: {reply}\n")
# reply = convo.send("How do I filter rows where a column value is greater than 100?")
# print(f"Claude: {reply}\n")
# print(convo.get_stats())
6. Error Handling Reference
import anthropic
from openai import OpenAI, APIError, RateLimitError, APIConnectionError
def robust_claude_call(prompt: str) -> str | None:
"""Demonstrates comprehensive error handling for the Anthropic API."""
client = anthropic.Anthropic()
try:
response = client.messages.create(
model="claude-sonnet-4-5",
max_tokens=1024,
messages=[{"role": "user", "content": prompt}]
)
return response.content[0].text
except anthropic.AuthenticationError:
print("ERROR: Invalid API key. Check your ANTHROPIC_API_KEY environment variable.")
return None
except anthropic.RateLimitError as e:
print(f"ERROR: Rate limit exceeded. Wait before retrying. Details: {e}")
return None
except anthropic.BadRequestError as e:
print(f"ERROR: Bad request — likely an issue with prompt structure. Details: {e}")
return None
except anthropic.APIConnectionError as e:
print(f"ERROR: Could not connect to Anthropic API. Check your internet connection. Details: {e}")
return None
except anthropic.APIStatusError as e:
print(f"ERROR: API returned status {e.status_code}. Message: {e.message}")
return None
def robust_openai_call(prompt: str) -> str | None:
"""Demonstrates comprehensive error handling for the OpenAI API."""
client = OpenAI()
try:
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}]
)
return response.choices[0].message.content
except RateLimitError:
print("ERROR: OpenAI rate limit exceeded.")
return None
except APIConnectionError as e:
print(f"ERROR: Connection failed. Details: {e}")
return None
except APIError as e:
print(f"ERROR: OpenAI API error {e.status_code}: {e.message}")
return None
7. Best Practices for Production Code
"""
Production best practices checklist implemented as code patterns.
"""
import os
import logging
import hashlib
import json
import time
from functools import wraps
from typing import Any
import anthropic
# 1. Configure structured logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s"
)
logger = logging.getLogger(__name__)
# 2. Never hardcode API keys — always use environment variables
def get_client() -> anthropic.Anthropic:
api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
raise EnvironmentError("ANTHROPIC_API_KEY must be set before running this application.")
return anthropic.Anthropic(api_key=api_key)
# 3. Log all API calls with metadata for debugging and cost tracking
def logged_api_call(func):
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
try:
result = func(*args, **kwargs)
duration = time.time() - start_time
logger.info("api_call_success", extra={"function": func.__name__, "duration_s": round(duration, 3)})
return result
except Exception as e:
duration = time.time() - start_time
logger.error("api_call_failed", extra={"function": func.__name__, "error": str(e), "duration_s": round(duration, 3)})
raise
return wrapper
# 4. Simple cache to avoid re-calling the API for identical prompts
class SimplePromptCache:
def __init__(self):
self._cache: dict[str, str] = {}
def _key(self, prompt: str, model: str) -> str:
content = f"{model}::{prompt}"
return hashlib.sha256(content.encode()).hexdigest()
def get(self, prompt: str, model: str) -> str | None:
return self._cache.get(self._key(prompt, model))
def set(self, prompt: str, model: str, response: str) -> None:
self._cache[self._key(prompt, model)] = response
def size(self) -> int:
return len(self._cache)
cache = SimplePromptCache()
# 5. Validate and sanitize inputs before sending to the API
def sanitize_prompt(prompt: str, max_length: int = 10_000) -> str:
"""Basic input validation for prompts."""
if not isinstance(prompt, str):
raise TypeError(f"Prompt must be a string, got {type(prompt)}")
if not prompt.strip():
raise ValueError("Prompt cannot be empty.")
if len(prompt) > max_length:
logger.warning(f"Prompt truncated from {len(prompt)} to {max_length} characters.")
prompt = prompt[:max_length]
return prompt.strip()
# 6. Use environment-specific model selection
def get_model_for_environment() -> str:
"""Use cheaper/faster models in development to reduce costs."""
environment = os.environ.get("APP_ENV", "development")
if environment == "production":
return "claude-sonnet-4-5"
else:
return "claude-haiku-4-5" # Cheaper for dev/testing
# 7. Set reasonable timeouts
@logged_api_call
def production_api_call(prompt: str) -> str:
client = get_client()
prompt = sanitize_prompt(prompt)
model = get_model_for_environment()
# Check cache first
cached = cache.get(prompt, model)
if cached:
logger.info("Cache hit — returning cached response.")
return cached
response = client.messages.create(
model=model,
max_tokens=2048,
messages=[{"role": "user", "content": prompt}],
timeout=30.0 # Seconds — prevents hanging indefinitely
)
result = response.content[0].text
cache.set(prompt, model, result)
logger.info(f"Tokens used: {response.usage.input_tokens} in, {response.usage.output_tokens} out")
return result
For the latest model IDs, pricing, and API capabilities, always refer to the official documentation: docs.anthropic.com, platform.openai.com/docs, and ai.google.dev. Model names and pricing change frequently.