Appendix G: Python Analytics Reference
The Python code from Chapter 34, with documentation, usage examples, and extension patterns.
Overview
Chapter 34 introduced a Python analytics system for creators who want to work with their data programmatically. This appendix provides the complete, documented code, usage examples for common analytics tasks, and guidance for extending the system with additional metrics.
Prerequisites: Python 3.10+. No external packages required (uses Python standard library only for core functions; optional pandas import for export functions).
Complete Code: VideoAnalytics and ChannelDashboard
"""
Creator Analytics System
========================
A lightweight Python system for analyzing YouTube/creator content performance.
Designed to work with manually-entered data or data exported from platform analytics.
Usage:
from creator_analytics import VideoAnalytics, ChannelDashboard
# Create a dashboard and add videos
dashboard = ChannelDashboard("My Science Channel")
video = VideoAnalytics(
title="Why Your Mitochondria Are Lying to You",
views=85000,
watch_time_minutes=720000, # total minutes watched across all views
likes=3200,
comments=410,
shares=890,
saves=1100,
duration_minutes=9.5,
hook_type="question",
topic_category="biology",
publish_date="2024-03-15"
)
dashboard.add_video(video)
dashboard.print_dashboard()
"""
from dataclasses import dataclass, field
from typing import Optional
from datetime import datetime
@dataclass
class VideoAnalytics:
"""
Stores performance data for a single video and computes derived metrics.
All rate metrics are expressed as percentages (0-100).
Growth Score is a weighted composite indicating channel-building potential.
"""
# Required fields
title: str
views: int
watch_time_minutes: float # Total minutes watched across all viewers
likes: int
comments: int
shares: int
saves: int
duration_minutes: float # Video length in minutes
# Optional metadata
hook_type: str = "unknown" # question / contrast / bold_claim / demo / story / problem
topic_category: str = "general"
publish_date: str = "" # YYYY-MM-DD format
impressions: int = 0 # Set if CTR data is available
clicks: int = 0 # For CTR calculation
def __post_init__(self):
"""Validate data on creation."""
if self.views < 0:
raise ValueError(f"Views cannot be negative: {self.views}")
if self.duration_minutes <= 0:
raise ValueError(f"Duration must be positive: {self.duration_minutes}")
# ─── Core Rate Properties ──────────────────────────────────────────────
@property
def completion_rate(self) -> float:
"""
Average percentage of video watched.
Formula: (total_minutes / views) / duration_minutes * 100
Benchmark: >50% is good for videos under 10 minutes.
"""
if self.views == 0 or self.duration_minutes == 0:
return 0.0
avg_watch = self.watch_time_minutes / self.views
return min((avg_watch / self.duration_minutes) * 100, 100.0)
@property
def like_rate(self) -> float:
"""Likes as percentage of views. Typical range: 1-5%."""
return (self.likes / self.views * 100) if self.views > 0 else 0.0
@property
def comment_rate(self) -> float:
"""Comments as percentage of views. Typical range: 0.1-2%."""
return (self.comments / self.views * 100) if self.views > 0 else 0.0
@property
def share_rate(self) -> float:
"""
Shares as percentage of views. The highest-value engagement signal.
Sharing requires active effort and social risk.
Typical range: 0.5-3%. Above 3% = strong viral potential.
"""
return (self.shares / self.views * 100) if self.views > 0 else 0.0
@property
def save_rate(self) -> float:
"""
Saves as percentage of views. Indicates content worth returning to.
Typical range: 0.5-2%. High save rate = strong evergreen signal.
"""
return (self.saves / self.views * 100) if self.views > 0 else 0.0
@property
def engagement_rate(self) -> float:
"""
Combined engagement (likes + comments + shares + saves) as % of views.
Composite measure of overall audience investment.
"""
total = self.likes + self.comments + self.shares + self.saves
return (total / self.views * 100) if self.views > 0 else 0.0
@property
def ctr(self) -> Optional[float]:
"""
Click-through rate: clicks / impressions * 100.
Returns None if impressions data not available.
Typical range: 2-10%. Below 2% = packaging problem.
"""
if self.impressions == 0:
return None
return (self.clicks / self.impressions * 100)
# ─── Growth Score ──────────────────────────────────────────────────────
@property
def growth_score(self) -> float:
"""
Composite metric weighting share rate, save rate, and engagement rate
to indicate whether content is building the channel (spreading to new
viewers) vs. serving existing viewers.
Formula: (share_rate × 2.0) + (save_rate × 1.5) + engagement_rate
Interpretation:
- Below 5: Content is performing but not actively growing channel
- 5-10: Healthy growth signal
- Above 10: Strong growth signal; potential viral spread
Shares weighted highest: direct mechanism for reaching new viewers.
Saves weighted second: signals durable value, improves algorithmic distribution.
Engagement weighted third: general interest signal.
"""
return (self.share_rate * 2.0) + (self.save_rate * 1.5) + (self.engagement_rate * 1.0)
# ─── Formatting ───────────────────────────────────────────────────────
def summary(self) -> dict:
"""Return all metrics as a dictionary for easy access and export."""
return {
"title": self.title,
"views": self.views,
"completion_rate": round(self.completion_rate, 1),
"like_rate": round(self.like_rate, 2),
"comment_rate": round(self.comment_rate, 2),
"share_rate": round(self.share_rate, 2),
"save_rate": round(self.save_rate, 2),
"engagement_rate": round(self.engagement_rate, 2),
"growth_score": round(self.growth_score, 2),
"ctr": round(self.ctr, 1) if self.ctr else None,
"hook_type": self.hook_type,
"topic_category": self.topic_category,
"publish_date": self.publish_date,
}
def __repr__(self) -> str:
return (
f"VideoAnalytics(title={self.title!r}, views={self.views:,}, "
f"completion={self.completion_rate:.1f}%, "
f"growth_score={self.growth_score:.2f})"
)
class ChannelDashboard:
"""
Aggregates VideoAnalytics objects and produces channel-level analysis.
Use this to:
- Track performance trends over time
- Compare content by hook type, topic category, or other metadata
- Identify your highest and lowest performers
- Understand what types of content drive channel growth
"""
def __init__(self, channel_name: str):
self.channel_name = channel_name
self._videos: list[VideoAnalytics] = []
def add_video(self, video: VideoAnalytics) -> None:
"""Add a video to the dashboard."""
self._videos.append(video)
def add_videos(self, videos: list[VideoAnalytics]) -> None:
"""Add multiple videos at once."""
self._videos.extend(videos)
@property
def videos(self) -> list[VideoAnalytics]:
"""All videos, sorted by publish date (newest first) if dates available."""
dated = [v for v in self._videos if v.publish_date]
undated = [v for v in self._videos if not v.publish_date]
return sorted(dated, key=lambda v: v.publish_date, reverse=True) + undated
# ─── Channel-Level Aggregates ──────────────────────────────────────────
def _avg(self, metric: str) -> float:
"""Calculate average of any VideoAnalytics property across all videos."""
if not self._videos:
return 0.0
values = [getattr(v, metric) for v in self._videos]
return sum(values) / len(values)
@property
def avg_completion_rate(self) -> float:
return self._avg("completion_rate")
@property
def avg_share_rate(self) -> float:
return self._avg("share_rate")
@property
def avg_save_rate(self) -> float:
return self._avg("save_rate")
@property
def avg_engagement_rate(self) -> float:
return self._avg("engagement_rate")
@property
def avg_growth_score(self) -> float:
return self._avg("growth_score")
@property
def total_views(self) -> int:
return sum(v.views for v in self._videos)
# ─── Sorting and Filtering ─────────────────────────────────────────────
def top_by(self, metric: str, n: int = 5) -> list[VideoAnalytics]:
"""Return top n videos by any metric."""
return sorted(self._videos, key=lambda v: getattr(v, metric), reverse=True)[:n]
def bottom_by(self, metric: str, n: int = 5) -> list[VideoAnalytics]:
"""Return bottom n videos by any metric."""
return sorted(self._videos, key=lambda v: getattr(v, metric))[:n]
# ─── Group Analysis ────────────────────────────────────────────────────
def by_hook_type(self) -> dict[str, dict]:
"""
Compare performance across different hook types.
Returns dict: {hook_type: {metric: average_value}}
Use this to determine which hook types work best for your channel.
Minimum ~5 videos per hook type for meaningful comparison.
"""
groups: dict[str, list[VideoAnalytics]] = {}
for video in self._videos:
hook = video.hook_type or "unknown"
groups.setdefault(hook, []).append(video)
results = {}
for hook, videos in groups.items():
results[hook] = {
"count": len(videos),
"avg_completion": round(sum(v.completion_rate for v in videos) / len(videos), 1),
"avg_share_rate": round(sum(v.share_rate for v in videos) / len(videos), 2),
"avg_growth_score": round(sum(v.growth_score for v in videos) / len(videos), 2),
"avg_views": int(sum(v.views for v in videos) / len(videos)),
}
return results
def by_topic(self) -> dict[str, dict]:
"""
Compare performance across topic categories.
Same structure as by_hook_type().
"""
groups: dict[str, list[VideoAnalytics]] = {}
for video in self._videos:
topic = video.topic_category or "general"
groups.setdefault(topic, []).append(video)
results = {}
for topic, videos in groups.items():
results[topic] = {
"count": len(videos),
"avg_completion": round(sum(v.completion_rate for v in videos) / len(videos), 1),
"avg_share_rate": round(sum(v.share_rate for v in videos) / len(videos), 2),
"avg_growth_score": round(sum(v.growth_score for v in videos) / len(videos), 2),
"avg_views": int(sum(v.views for v in videos) / len(videos)),
}
return results
def trend_over_time(self, metric: str = "growth_score") -> list[dict]:
"""
Return metric values in chronological order (oldest first).
Use to visualize whether a metric is improving over time.
Args:
metric: Any VideoAnalytics property name (default: "growth_score")
Returns:
List of {date, title, value} dicts, sorted by date.
"""
dated = [v for v in self._videos if v.publish_date]
sorted_videos = sorted(dated, key=lambda v: v.publish_date)
return [
{
"date": v.publish_date,
"title": v.title[:40],
"value": round(getattr(v, metric), 2)
}
for v in sorted_videos
]
# ─── Dashboard Output ──────────────────────────────────────────────────
def print_dashboard(self) -> None:
"""Print a formatted analytics summary to the console."""
n = len(self._videos)
if n == 0:
print(f"{self.channel_name}: No videos loaded.")
return
print(f"\n{'='*60}")
print(f" {self.channel_name} — Analytics Dashboard")
print(f" {n} videos | {self.total_views:,} total views")
print(f"{'='*60}")
print(f"\n CHANNEL AVERAGES")
print(f" {'Completion Rate:':<22} {self.avg_completion_rate:.1f}%")
print(f" {'Share Rate:':<22} {self.avg_share_rate:.2f}%")
print(f" {'Save Rate:':<22} {self.avg_save_rate:.2f}%")
print(f" {'Engagement Rate:':<22} {self.avg_engagement_rate:.2f}%")
print(f" {'Growth Score:':<22} {self.avg_growth_score:.2f}")
print(f"\n TOP 3 BY GROWTH SCORE")
for i, v in enumerate(self.top_by("growth_score", 3), 1):
print(f" {i}. {v.title[:45]:<45} | Score: {v.growth_score:.2f}")
print(f"\n TOP 3 BY COMPLETION RATE")
for i, v in enumerate(self.top_by("completion_rate", 3), 1):
print(f" {i}. {v.title[:45]:<45} | {v.completion_rate:.1f}%")
print(f"\n BOTTOM 3 BY COMPLETION RATE")
for i, v in enumerate(self.bottom_by("completion_rate", 3), 1):
print(f" {i}. {v.title[:45]:<45} | {v.completion_rate:.1f}%")
print(f"{'='*60}\n")
def export_csv(self, filename: str = "channel_analytics.csv") -> None:
"""Export all video analytics to a CSV file."""
if not self._videos:
print("No videos to export.")
return
import csv
fieldnames = list(self._videos[0].summary().keys())
with open(filename, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for video in self.videos:
writer.writerow(video.summary())
print(f"Exported {len(self._videos)} videos to {filename}")
Usage Examples
Basic Setup: Adding Your First Videos
from creator_analytics import VideoAnalytics, ChannelDashboard
dashboard = ChannelDashboard("Marcus Explains Science")
# Add videos with your real data from YouTube Studio
dashboard.add_videos([
VideoAnalytics(
title="Why Your Mitochondria Are Lying to You",
views=85000,
watch_time_minutes=720000,
likes=3200, comments=410, shares=890, saves=1100,
duration_minutes=9.5,
hook_type="question",
topic_category="cell_biology",
publish_date="2024-03-15"
),
VideoAnalytics(
title="The DNA Discovery Race (feat. History Channel)",
views=142000,
watch_time_minutes=1010000,
likes=6800, comments=920, shares=2400, saves=1800,
duration_minutes=8.2,
hook_type="story",
topic_category="history_of_science",
publish_date="2024-04-02"
),
VideoAnalytics(
title="Why Scientists Disagree (And Why That's Good)",
views=68000,
watch_time_minutes=540000,
likes=4100, comments=680, shares=1200, saves=900,
duration_minutes=11.3,
hook_type="bold_claim",
topic_category="meta_science",
publish_date="2024-04-18"
),
])
dashboard.print_dashboard()
Comparing Hook Types
hook_analysis = dashboard.by_hook_type()
print("\nHOOK TYPE COMPARISON")
print(f"{'Hook Type':<15} {'Count':<6} {'Completion':<12} {'Share Rate':<12} {'Growth Score'}")
print("-" * 65)
for hook, stats in sorted(hook_analysis.items(),
key=lambda x: x[1]["avg_growth_score"],
reverse=True):
print(f"{hook:<15} {stats['count']:<6} {stats['avg_completion']:<12.1f} "
f"{stats['avg_share_rate']:<12.2f} {stats['avg_growth_score']:.2f}")
Tracking Improvement Over Time
trend = dashboard.trend_over_time("completion_rate")
print("\nCOMPLETION RATE OVER TIME")
for entry in trend:
bar_length = int(entry["value"] / 2) # Scale: 50% = 25 chars
bar = "█" * bar_length
print(f"{entry['date']} {entry['value']:>5.1f}% {bar}")
Finding What's Holding You Back
# Identify your lowest-completion videos — candidates for hook or pacing improvement
print("\nVIDEOS NEEDING HOOK/PACING REVIEW (bottom 3 by completion)")
for video in dashboard.bottom_by("completion_rate", 3):
print(f"\n {video.title}")
print(f" Completion: {video.completion_rate:.1f}% | "
f"Hook type: {video.hook_type} | "
f"Duration: {video.duration_minutes:.1f} min")
# Identify your most shareable content — what's driving organic spread?
print("\nMOST SHAREABLE CONTENT (top 3 by share rate)")
for video in dashboard.top_by("share_rate", 3):
print(f"\n {video.title}")
print(f" Share rate: {video.share_rate:.2f}% | "
f"Growth score: {video.growth_score:.2f} | "
f"Topic: {video.topic_category}")
Extending the System
Adding Custom Metrics
@dataclass
class VideoAnalyticsExtended(VideoAnalytics):
"""Extension with additional metrics for specific use cases."""
# Add fields for your specific tracking needs
sponsor_integrated: bool = False
collaboration_partner: str = ""
thumbnail_version: str = "A" # A/B testing tracker
@property
def sponsor_engagement_lift(self) -> Optional[float]:
"""
If tracking sponsored vs. non-sponsored performance,
compare this video's engagement to channel average.
Negative means sponsored content underperforms — worth examining.
"""
return None # Implement comparison to dashboard average
Importing from YouTube Studio Export
YouTube Studio allows exporting analytics to CSV. To use your exported data:
import csv
def load_from_youtube_export(csv_path: str) -> list[dict]:
"""
Load videos from YouTube Studio table export.
Map column names from YouTube's export format to VideoAnalytics fields.
YouTube Studio CSV columns (as of 2024):
- 'Video title' → title
- 'Views' → views
- 'Watch time (hours)' → watch_time_minutes (multiply by 60)
- 'Likes' → likes
- 'Comments' → comments
- 'Shares' → shares
- 'Saves' (if available) → saves
"""
videos = []
with open(csv_path, encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
try:
video = VideoAnalytics(
title=row.get("Video title", "Unknown"),
views=int(row.get("Views", 0).replace(",", "")),
watch_time_minutes=float(row.get("Watch time (hours)", 0)) * 60,
likes=int(row.get("Likes", 0).replace(",", "")),
comments=int(row.get("Comments", 0).replace(",", "")),
shares=int(row.get("Shares", 0).replace(",", "")),
saves=int(row.get("Saves", 0).replace(",", "")),
duration_minutes=10.0 # Not in YouTube export; enter manually
)
videos.append(video)
except (ValueError, KeyError) as e:
print(f"Skipping row due to error: {e}")
return videos
Growth Score Interpretation Reference
| Growth Score | What It Suggests | Common Causes |
|---|---|---|
| Below 3 | Low growth signal | Low shares, low engagement; content serves existing viewers but doesn't spread |
| 3–6 | Moderate growth signal | Decent engagement; some sharing; typical for established channels in stable niches |
| 6–10 | Strong growth signal | High share rate; high engagement; content reaching new audiences regularly |
| Above 10 | Very strong growth signal | Exceptional share rate; viral potential; new-audience discovery high |
Note: Interpret growth score relative to your own channel's baseline, not these absolute ranges. What matters is whether your score is improving over time and which videos score highest — those patterns are specific to your channel and audience.