Appendix G: Python Analytics Reference

The Python code from Chapter 34, with documentation, usage examples, and extension patterns.


Overview

Chapter 34 introduced a Python analytics system for creators who want to work with their data programmatically. This appendix provides the complete, documented code, usage examples for common analytics tasks, and guidance for extending the system with additional metrics.

Prerequisites: Python 3.10+. No external packages required (uses Python standard library only for core functions; optional pandas import for export functions).


Complete Code: VideoAnalytics and ChannelDashboard

"""
Creator Analytics System
========================
A lightweight Python system for analyzing YouTube/creator content performance.
Designed to work with manually-entered data or data exported from platform analytics.

Usage:
    from creator_analytics import VideoAnalytics, ChannelDashboard

    # Create a dashboard and add videos
    dashboard = ChannelDashboard("My Science Channel")

    video = VideoAnalytics(
        title="Why Your Mitochondria Are Lying to You",
        views=85000,
        watch_time_minutes=720000,  # total minutes watched across all views
        likes=3200,
        comments=410,
        shares=890,
        saves=1100,
        duration_minutes=9.5,
        hook_type="question",
        topic_category="biology",
        publish_date="2024-03-15"
    )

    dashboard.add_video(video)
    dashboard.print_dashboard()
"""

from dataclasses import dataclass, field
from typing import Optional
from datetime import datetime


@dataclass
class VideoAnalytics:
    """
    Stores performance data for a single video and computes derived metrics.

    All rate metrics are expressed as percentages (0-100).
    Growth Score is a weighted composite indicating channel-building potential.
    """

    # Required fields
    title: str
    views: int
    watch_time_minutes: float      # Total minutes watched across all viewers
    likes: int
    comments: int
    shares: int
    saves: int
    duration_minutes: float        # Video length in minutes

    # Optional metadata
    hook_type: str = "unknown"     # question / contrast / bold_claim / demo / story / problem
    topic_category: str = "general"
    publish_date: str = ""         # YYYY-MM-DD format
    impressions: int = 0           # Set if CTR data is available
    clicks: int = 0                # For CTR calculation

    def __post_init__(self):
        """Validate data on creation."""
        if self.views < 0:
            raise ValueError(f"Views cannot be negative: {self.views}")
        if self.duration_minutes <= 0:
            raise ValueError(f"Duration must be positive: {self.duration_minutes}")

    # ─── Core Rate Properties ──────────────────────────────────────────────

    @property
    def completion_rate(self) -> float:
        """
        Average percentage of video watched.
        Formula: (total_minutes / views) / duration_minutes * 100
        Benchmark: >50% is good for videos under 10 minutes.
        """
        if self.views == 0 or self.duration_minutes == 0:
            return 0.0
        avg_watch = self.watch_time_minutes / self.views
        return min((avg_watch / self.duration_minutes) * 100, 100.0)

    @property
    def like_rate(self) -> float:
        """Likes as percentage of views. Typical range: 1-5%."""
        return (self.likes / self.views * 100) if self.views > 0 else 0.0

    @property
    def comment_rate(self) -> float:
        """Comments as percentage of views. Typical range: 0.1-2%."""
        return (self.comments / self.views * 100) if self.views > 0 else 0.0

    @property
    def share_rate(self) -> float:
        """
        Shares as percentage of views. The highest-value engagement signal.
        Sharing requires active effort and social risk.
        Typical range: 0.5-3%. Above 3% = strong viral potential.
        """
        return (self.shares / self.views * 100) if self.views > 0 else 0.0

    @property
    def save_rate(self) -> float:
        """
        Saves as percentage of views. Indicates content worth returning to.
        Typical range: 0.5-2%. High save rate = strong evergreen signal.
        """
        return (self.saves / self.views * 100) if self.views > 0 else 0.0

    @property
    def engagement_rate(self) -> float:
        """
        Combined engagement (likes + comments + shares + saves) as % of views.
        Composite measure of overall audience investment.
        """
        total = self.likes + self.comments + self.shares + self.saves
        return (total / self.views * 100) if self.views > 0 else 0.0

    @property
    def ctr(self) -> Optional[float]:
        """
        Click-through rate: clicks / impressions * 100.
        Returns None if impressions data not available.
        Typical range: 2-10%. Below 2% = packaging problem.
        """
        if self.impressions == 0:
            return None
        return (self.clicks / self.impressions * 100)

    # ─── Growth Score ──────────────────────────────────────────────────────

    @property
    def growth_score(self) -> float:
        """
        Composite metric weighting share rate, save rate, and engagement rate
        to indicate whether content is building the channel (spreading to new
        viewers) vs. serving existing viewers.

        Formula: (share_rate × 2.0) + (save_rate × 1.5) + engagement_rate

        Interpretation:
        - Below 5: Content is performing but not actively growing channel
        - 5-10: Healthy growth signal
        - Above 10: Strong growth signal; potential viral spread

        Shares weighted highest: direct mechanism for reaching new viewers.
        Saves weighted second: signals durable value, improves algorithmic distribution.
        Engagement weighted third: general interest signal.
        """
        return (self.share_rate * 2.0) + (self.save_rate * 1.5) + (self.engagement_rate * 1.0)

    # ─── Formatting ───────────────────────────────────────────────────────

    def summary(self) -> dict:
        """Return all metrics as a dictionary for easy access and export."""
        return {
            "title": self.title,
            "views": self.views,
            "completion_rate": round(self.completion_rate, 1),
            "like_rate": round(self.like_rate, 2),
            "comment_rate": round(self.comment_rate, 2),
            "share_rate": round(self.share_rate, 2),
            "save_rate": round(self.save_rate, 2),
            "engagement_rate": round(self.engagement_rate, 2),
            "growth_score": round(self.growth_score, 2),
            "ctr": round(self.ctr, 1) if self.ctr else None,
            "hook_type": self.hook_type,
            "topic_category": self.topic_category,
            "publish_date": self.publish_date,
        }

    def __repr__(self) -> str:
        return (
            f"VideoAnalytics(title={self.title!r}, views={self.views:,}, "
            f"completion={self.completion_rate:.1f}%, "
            f"growth_score={self.growth_score:.2f})"
        )


class ChannelDashboard:
    """
    Aggregates VideoAnalytics objects and produces channel-level analysis.

    Use this to:
    - Track performance trends over time
    - Compare content by hook type, topic category, or other metadata
    - Identify your highest and lowest performers
    - Understand what types of content drive channel growth
    """

    def __init__(self, channel_name: str):
        self.channel_name = channel_name
        self._videos: list[VideoAnalytics] = []

    def add_video(self, video: VideoAnalytics) -> None:
        """Add a video to the dashboard."""
        self._videos.append(video)

    def add_videos(self, videos: list[VideoAnalytics]) -> None:
        """Add multiple videos at once."""
        self._videos.extend(videos)

    @property
    def videos(self) -> list[VideoAnalytics]:
        """All videos, sorted by publish date (newest first) if dates available."""
        dated = [v for v in self._videos if v.publish_date]
        undated = [v for v in self._videos if not v.publish_date]
        return sorted(dated, key=lambda v: v.publish_date, reverse=True) + undated

    # ─── Channel-Level Aggregates ──────────────────────────────────────────

    def _avg(self, metric: str) -> float:
        """Calculate average of any VideoAnalytics property across all videos."""
        if not self._videos:
            return 0.0
        values = [getattr(v, metric) for v in self._videos]
        return sum(values) / len(values)

    @property
    def avg_completion_rate(self) -> float:
        return self._avg("completion_rate")

    @property
    def avg_share_rate(self) -> float:
        return self._avg("share_rate")

    @property
    def avg_save_rate(self) -> float:
        return self._avg("save_rate")

    @property
    def avg_engagement_rate(self) -> float:
        return self._avg("engagement_rate")

    @property
    def avg_growth_score(self) -> float:
        return self._avg("growth_score")

    @property
    def total_views(self) -> int:
        return sum(v.views for v in self._videos)

    # ─── Sorting and Filtering ─────────────────────────────────────────────

    def top_by(self, metric: str, n: int = 5) -> list[VideoAnalytics]:
        """Return top n videos by any metric."""
        return sorted(self._videos, key=lambda v: getattr(v, metric), reverse=True)[:n]

    def bottom_by(self, metric: str, n: int = 5) -> list[VideoAnalytics]:
        """Return bottom n videos by any metric."""
        return sorted(self._videos, key=lambda v: getattr(v, metric))[:n]

    # ─── Group Analysis ────────────────────────────────────────────────────

    def by_hook_type(self) -> dict[str, dict]:
        """
        Compare performance across different hook types.

        Returns dict: {hook_type: {metric: average_value}}

        Use this to determine which hook types work best for your channel.
        Minimum ~5 videos per hook type for meaningful comparison.
        """
        groups: dict[str, list[VideoAnalytics]] = {}
        for video in self._videos:
            hook = video.hook_type or "unknown"
            groups.setdefault(hook, []).append(video)

        results = {}
        for hook, videos in groups.items():
            results[hook] = {
                "count": len(videos),
                "avg_completion": round(sum(v.completion_rate for v in videos) / len(videos), 1),
                "avg_share_rate": round(sum(v.share_rate for v in videos) / len(videos), 2),
                "avg_growth_score": round(sum(v.growth_score for v in videos) / len(videos), 2),
                "avg_views": int(sum(v.views for v in videos) / len(videos)),
            }
        return results

    def by_topic(self) -> dict[str, dict]:
        """
        Compare performance across topic categories.
        Same structure as by_hook_type().
        """
        groups: dict[str, list[VideoAnalytics]] = {}
        for video in self._videos:
            topic = video.topic_category or "general"
            groups.setdefault(topic, []).append(video)

        results = {}
        for topic, videos in groups.items():
            results[topic] = {
                "count": len(videos),
                "avg_completion": round(sum(v.completion_rate for v in videos) / len(videos), 1),
                "avg_share_rate": round(sum(v.share_rate for v in videos) / len(videos), 2),
                "avg_growth_score": round(sum(v.growth_score for v in videos) / len(videos), 2),
                "avg_views": int(sum(v.views for v in videos) / len(videos)),
            }
        return results

    def trend_over_time(self, metric: str = "growth_score") -> list[dict]:
        """
        Return metric values in chronological order (oldest first).
        Use to visualize whether a metric is improving over time.

        Args:
            metric: Any VideoAnalytics property name (default: "growth_score")

        Returns:
            List of {date, title, value} dicts, sorted by date.
        """
        dated = [v for v in self._videos if v.publish_date]
        sorted_videos = sorted(dated, key=lambda v: v.publish_date)

        return [
            {
                "date": v.publish_date,
                "title": v.title[:40],
                "value": round(getattr(v, metric), 2)
            }
            for v in sorted_videos
        ]

    # ─── Dashboard Output ──────────────────────────────────────────────────

    def print_dashboard(self) -> None:
        """Print a formatted analytics summary to the console."""
        n = len(self._videos)
        if n == 0:
            print(f"{self.channel_name}: No videos loaded.")
            return

        print(f"\n{'='*60}")
        print(f"  {self.channel_name} — Analytics Dashboard")
        print(f"  {n} videos | {self.total_views:,} total views")
        print(f"{'='*60}")

        print(f"\n  CHANNEL AVERAGES")
        print(f"  {'Completion Rate:':<22} {self.avg_completion_rate:.1f}%")
        print(f"  {'Share Rate:':<22} {self.avg_share_rate:.2f}%")
        print(f"  {'Save Rate:':<22} {self.avg_save_rate:.2f}%")
        print(f"  {'Engagement Rate:':<22} {self.avg_engagement_rate:.2f}%")
        print(f"  {'Growth Score:':<22} {self.avg_growth_score:.2f}")

        print(f"\n  TOP 3 BY GROWTH SCORE")
        for i, v in enumerate(self.top_by("growth_score", 3), 1):
            print(f"  {i}. {v.title[:45]:<45} | Score: {v.growth_score:.2f}")

        print(f"\n  TOP 3 BY COMPLETION RATE")
        for i, v in enumerate(self.top_by("completion_rate", 3), 1):
            print(f"  {i}. {v.title[:45]:<45} | {v.completion_rate:.1f}%")

        print(f"\n  BOTTOM 3 BY COMPLETION RATE")
        for i, v in enumerate(self.bottom_by("completion_rate", 3), 1):
            print(f"  {i}. {v.title[:45]:<45} | {v.completion_rate:.1f}%")

        print(f"{'='*60}\n")

    def export_csv(self, filename: str = "channel_analytics.csv") -> None:
        """Export all video analytics to a CSV file."""
        if not self._videos:
            print("No videos to export.")
            return

        import csv
        fieldnames = list(self._videos[0].summary().keys())

        with open(filename, "w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            for video in self.videos:
                writer.writerow(video.summary())

        print(f"Exported {len(self._videos)} videos to {filename}")

Usage Examples

Basic Setup: Adding Your First Videos

from creator_analytics import VideoAnalytics, ChannelDashboard

dashboard = ChannelDashboard("Marcus Explains Science")

# Add videos with your real data from YouTube Studio
dashboard.add_videos([
    VideoAnalytics(
        title="Why Your Mitochondria Are Lying to You",
        views=85000,
        watch_time_minutes=720000,
        likes=3200, comments=410, shares=890, saves=1100,
        duration_minutes=9.5,
        hook_type="question",
        topic_category="cell_biology",
        publish_date="2024-03-15"
    ),
    VideoAnalytics(
        title="The DNA Discovery Race (feat. History Channel)",
        views=142000,
        watch_time_minutes=1010000,
        likes=6800, comments=920, shares=2400, saves=1800,
        duration_minutes=8.2,
        hook_type="story",
        topic_category="history_of_science",
        publish_date="2024-04-02"
    ),
    VideoAnalytics(
        title="Why Scientists Disagree (And Why That's Good)",
        views=68000,
        watch_time_minutes=540000,
        likes=4100, comments=680, shares=1200, saves=900,
        duration_minutes=11.3,
        hook_type="bold_claim",
        topic_category="meta_science",
        publish_date="2024-04-18"
    ),
])

dashboard.print_dashboard()

Comparing Hook Types

hook_analysis = dashboard.by_hook_type()

print("\nHOOK TYPE COMPARISON")
print(f"{'Hook Type':<15} {'Count':<6} {'Completion':<12} {'Share Rate':<12} {'Growth Score'}")
print("-" * 65)
for hook, stats in sorted(hook_analysis.items(),
                           key=lambda x: x[1]["avg_growth_score"],
                           reverse=True):
    print(f"{hook:<15} {stats['count']:<6} {stats['avg_completion']:<12.1f} "
          f"{stats['avg_share_rate']:<12.2f} {stats['avg_growth_score']:.2f}")

Tracking Improvement Over Time

trend = dashboard.trend_over_time("completion_rate")

print("\nCOMPLETION RATE OVER TIME")
for entry in trend:
    bar_length = int(entry["value"] / 2)  # Scale: 50% = 25 chars
    bar = "█" * bar_length
    print(f"{entry['date']}  {entry['value']:>5.1f}%  {bar}")

Finding What's Holding You Back

# Identify your lowest-completion videos — candidates for hook or pacing improvement
print("\nVIDEOS NEEDING HOOK/PACING REVIEW (bottom 3 by completion)")
for video in dashboard.bottom_by("completion_rate", 3):
    print(f"\n  {video.title}")
    print(f"  Completion: {video.completion_rate:.1f}% | "
          f"Hook type: {video.hook_type} | "
          f"Duration: {video.duration_minutes:.1f} min")

# Identify your most shareable content — what's driving organic spread?
print("\nMOST SHAREABLE CONTENT (top 3 by share rate)")
for video in dashboard.top_by("share_rate", 3):
    print(f"\n  {video.title}")
    print(f"  Share rate: {video.share_rate:.2f}% | "
          f"Growth score: {video.growth_score:.2f} | "
          f"Topic: {video.topic_category}")

Extending the System

Adding Custom Metrics

@dataclass
class VideoAnalyticsExtended(VideoAnalytics):
    """Extension with additional metrics for specific use cases."""

    # Add fields for your specific tracking needs
    sponsor_integrated: bool = False
    collaboration_partner: str = ""
    thumbnail_version: str = "A"  # A/B testing tracker

    @property
    def sponsor_engagement_lift(self) -> Optional[float]:
        """
        If tracking sponsored vs. non-sponsored performance,
        compare this video's engagement to channel average.
        Negative means sponsored content underperforms — worth examining.
        """
        return None  # Implement comparison to dashboard average

Importing from YouTube Studio Export

YouTube Studio allows exporting analytics to CSV. To use your exported data:

import csv

def load_from_youtube_export(csv_path: str) -> list[dict]:
    """
    Load videos from YouTube Studio table export.
    Map column names from YouTube's export format to VideoAnalytics fields.

    YouTube Studio CSV columns (as of 2024):
    - 'Video title' → title
    - 'Views' → views
    - 'Watch time (hours)' → watch_time_minutes (multiply by 60)
    - 'Likes' → likes
    - 'Comments' → comments
    - 'Shares' → shares
    - 'Saves' (if available) → saves
    """
    videos = []
    with open(csv_path, encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                video = VideoAnalytics(
                    title=row.get("Video title", "Unknown"),
                    views=int(row.get("Views", 0).replace(",", "")),
                    watch_time_minutes=float(row.get("Watch time (hours)", 0)) * 60,
                    likes=int(row.get("Likes", 0).replace(",", "")),
                    comments=int(row.get("Comments", 0).replace(",", "")),
                    shares=int(row.get("Shares", 0).replace(",", "")),
                    saves=int(row.get("Saves", 0).replace(",", "")),
                    duration_minutes=10.0  # Not in YouTube export; enter manually
                )
                videos.append(video)
            except (ValueError, KeyError) as e:
                print(f"Skipping row due to error: {e}")
    return videos

Growth Score Interpretation Reference

Growth Score What It Suggests Common Causes
Below 3 Low growth signal Low shares, low engagement; content serves existing viewers but doesn't spread
3–6 Moderate growth signal Decent engagement; some sharing; typical for established channels in stable niches
6–10 Strong growth signal High share rate; high engagement; content reaching new audiences regularly
Above 10 Very strong growth signal Exceptional share rate; viral potential; new-audience discovery high

Note: Interpret growth score relative to your own channel's baseline, not these absolute ranges. What matters is whether your score is improving over time and which videos score highest — those patterns are specific to your channel and audience.