Case Study 2: Real-Time Media Sentiment Monitoring System
Overview
This case study builds a real-time system for monitoring media sentiment about college football teams and players, tracking perception changes and generating alerts.
Business Context
A college football program's communications department needs to: - Monitor media coverage across multiple sources - Track sentiment trends over the season - Identify negative coverage quickly for response - Measure impact of events on team perception - Compare media coverage to rival programs
Data Description
# Article schema
article_schema = {
'article_id': 'unique identifier',
'source': 'publication name',
'author': 'writer name',
'title': 'article headline',
'content': 'full article text',
'published_date': 'publication timestamp',
'url': 'article URL',
'category': 'article type (news, opinion, analysis)'
}
# Entity mentions
mention_schema = {
'article_id': 'reference to article',
'entity_type': 'team/player/coach',
'entity_name': 'normalized name',
'sentiment': 'sentiment score',
'context': 'surrounding text'
}
# Data sources
sources = {
'national': ['ESPN', 'CBS Sports', 'Yahoo Sports', 'The Athletic'],
'local': ['Columbus Dispatch', 'Eleven Warriors', 'Bucknuts'],
'social': ['Twitter aggregation', 'Reddit summaries'],
'volume': '500-1000 articles/day during season'
}
Implementation
Step 1: Article Processing Pipeline
import re
import pandas as pd
import numpy as np
from typing import Dict, List, Optional, Tuple
from datetime import datetime, timedelta
from collections import defaultdict
from dataclasses import dataclass
@dataclass
class ProcessedArticle:
"""Processed article with extracted information."""
article_id: str
source: str
published_date: datetime
title_sentiment: float
content_sentiment: float
entities: List[Dict]
topics: List[str]
key_phrases: List[str]
class ArticleProcessor:
"""Process incoming articles for sentiment analysis."""
def __init__(self):
self.entity_recognizer = FootballEntityRecognizer()
self.sentiment_analyzer = FootballSentimentAnalyzer()
self.topic_classifier = TopicClassifier()
def process_article(self, article: Dict) -> ProcessedArticle:
"""Process a single article."""
# Extract entities
entities = self.entity_recognizer.extract(
article['title'] + ' ' + article['content']
)
# Analyze sentiment
title_sentiment = self.sentiment_analyzer.analyze(article['title'])
content_sentiment = self.sentiment_analyzer.analyze(article['content'])
# Classify topics
topics = self.topic_classifier.classify(article['content'])
# Extract key phrases
key_phrases = self._extract_key_phrases(article['content'])
return ProcessedArticle(
article_id=article['article_id'],
source=article['source'],
published_date=article['published_date'],
title_sentiment=title_sentiment,
content_sentiment=content_sentiment,
entities=entities,
topics=topics,
key_phrases=key_phrases
)
def _extract_key_phrases(self, text: str, n: int = 10) -> List[str]:
"""Extract key phrases from text."""
# Simple extraction using noun phrases
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(ngram_range=(2, 3), max_features=n)
try:
vectorizer.fit_transform([text])
return list(vectorizer.get_feature_names_out())
except:
return []
class FootballEntityRecognizer:
"""Recognize football entities in text."""
def __init__(self):
self.team_aliases = self._load_team_aliases()
self.coach_names = self._load_coach_names()
def _load_team_aliases(self) -> Dict[str, str]:
"""Load team name variations."""
return {
'ohio state': 'Ohio State',
'osu': 'Ohio State',
'buckeyes': 'Ohio State',
'the ohio state': 'Ohio State',
'michigan': 'Michigan',
'wolverines': 'Michigan',
'um': 'Michigan',
'alabama': 'Alabama',
'crimson tide': 'Alabama',
'bama': 'Alabama',
'georgia': 'Georgia',
'bulldogs': 'Georgia',
'dawgs': 'Georgia',
'uga': 'Georgia'
}
def _load_coach_names(self) -> Dict[str, Dict]:
"""Load coach information."""
return {
'ryan day': {'team': 'Ohio State', 'position': 'Head Coach'},
'kirby smart': {'team': 'Georgia', 'position': 'Head Coach'},
'nick saban': {'team': 'Alabama', 'position': 'Head Coach (Former)'}
}
def extract(self, text: str) -> List[Dict]:
"""Extract all entities from text."""
entities = []
text_lower = text.lower()
# Find teams
for alias, canonical in self.team_aliases.items():
if alias in text_lower:
# Find context
idx = text_lower.find(alias)
context = text[max(0, idx-50):idx+len(alias)+50]
entities.append({
'type': 'team',
'name': canonical,
'alias_found': alias,
'context': context
})
# Find coaches
for coach, info in self.coach_names.items():
if coach in text_lower:
idx = text_lower.find(coach)
context = text[max(0, idx-50):idx+len(coach)+50]
entities.append({
'type': 'coach',
'name': coach.title(),
'team': info['team'],
'context': context
})
# Find player names (capitalized pairs)
player_pattern = re.compile(r'\b([A-Z][a-z]+\s+[A-Z][a-z]+)\b')
for match in player_pattern.finditer(text):
name = match.group(1)
if name.lower() not in self.team_aliases and name.lower() not in self.coach_names:
entities.append({
'type': 'player',
'name': name,
'context': text[max(0, match.start()-30):match.end()+30]
})
return entities
class FootballSentimentAnalyzer:
"""Football-specific sentiment analysis."""
POSITIVE_WORDS = {
'win', 'victory', 'dominant', 'impressive', 'excellent',
'outstanding', 'elite', 'championship', 'success', 'breakthrough',
'clutch', 'stellar', 'explosive', 'dominant', 'historic'
}
NEGATIVE_WORDS = {
'loss', 'defeat', 'struggle', 'disappointing', 'concern',
'injury', 'suspend', 'fire', 'investigation', 'scandal',
'blowout', 'collapse', 'embarrass', 'turnover', 'mistake'
}
INTENSIFIERS = {
'very': 1.5, 'extremely': 2.0, 'incredibly': 2.0,
'somewhat': 0.7, 'slightly': 0.5, 'totally': 1.8
}
def analyze(self, text: str) -> float:
"""Analyze sentiment of text."""
words = text.lower().split()
positive_score = 0
negative_score = 0
current_intensifier = 1.0
for i, word in enumerate(words):
# Check for intensifiers
if word in self.INTENSIFIERS:
current_intensifier = self.INTENSIFIERS[word]
continue
# Check sentiment words
if word in self.POSITIVE_WORDS:
positive_score += current_intensifier
elif word in self.NEGATIVE_WORDS:
negative_score += current_intensifier
current_intensifier = 1.0
total = positive_score + negative_score
if total == 0:
return 0.5 # Neutral
return positive_score / total
class TopicClassifier:
"""Classify articles into topics."""
TOPIC_KEYWORDS = {
'game_result': ['win', 'loss', 'beat', 'score', 'final', 'victory', 'defeat'],
'recruiting': ['recruit', 'commit', 'visit', 'offer', 'star', 'class'],
'transfer': ['transfer', 'portal', 'enter', 'leave', 'destination'],
'injury': ['injury', 'injured', 'out', 'questionable', 'surgery'],
'coaching': ['coach', 'hire', 'fire', 'staff', 'coordinator'],
'preview': ['preview', 'prediction', 'matchup', 'expect', 'ahead'],
'analysis': ['analysis', 'breakdown', 'film', 'scheme', 'strategy']
}
def classify(self, text: str) -> List[str]:
"""Classify text into topics."""
text_lower = text.lower()
topics = []
for topic, keywords in self.TOPIC_KEYWORDS.items():
score = sum(1 for kw in keywords if kw in text_lower)
if score >= 2: # Threshold
topics.append(topic)
return topics if topics else ['general']
Step 2: Sentiment Tracking System
class SentimentTracker:
"""Track sentiment over time for entities."""
def __init__(self):
self.history = defaultdict(list)
self.daily_aggregates = defaultdict(dict)
def record_mention(self,
entity: str,
sentiment: float,
timestamp: datetime,
source: str,
article_id: str):
"""Record a sentiment mention."""
self.history[entity].append({
'sentiment': sentiment,
'timestamp': timestamp,
'source': source,
'article_id': article_id
})
# Update daily aggregate
date_key = timestamp.strftime('%Y-%m-%d')
if date_key not in self.daily_aggregates[entity]:
self.daily_aggregates[entity][date_key] = {
'sentiments': [],
'sources': set()
}
self.daily_aggregates[entity][date_key]['sentiments'].append(sentiment)
self.daily_aggregates[entity][date_key]['sources'].add(source)
def get_trend(self,
entity: str,
days: int = 30) -> pd.DataFrame:
"""Get sentiment trend for entity."""
if entity not in self.daily_aggregates:
return pd.DataFrame()
cutoff = datetime.now() - timedelta(days=days)
cutoff_str = cutoff.strftime('%Y-%m-%d')
data = []
for date, info in self.daily_aggregates[entity].items():
if date >= cutoff_str:
data.append({
'date': date,
'avg_sentiment': np.mean(info['sentiments']),
'mention_count': len(info['sentiments']),
'source_count': len(info['sources']),
'sentiment_std': np.std(info['sentiments'])
})
df = pd.DataFrame(data)
if not df.empty:
df = df.sort_values('date')
return df
def calculate_momentum(self,
entity: str,
window: int = 7) -> Dict:
"""Calculate sentiment momentum."""
trend = self.get_trend(entity, days=window * 2)
if len(trend) < window:
return {'momentum': 0, 'direction': 'stable'}
recent = trend.tail(window)['avg_sentiment'].mean()
previous = trend.head(window)['avg_sentiment'].mean()
momentum = recent - previous
if momentum > 0.1:
direction = 'improving'
elif momentum < -0.1:
direction = 'declining'
else:
direction = 'stable'
return {
'momentum': momentum,
'direction': direction,
'recent_avg': recent,
'previous_avg': previous
}
def detect_shifts(self,
entity: str,
threshold: float = 0.15) -> List[Dict]:
"""Detect significant sentiment shifts."""
history = self.history[entity]
if len(history) < 10:
return []
# Sort by timestamp
sorted_history = sorted(history, key=lambda x: x['timestamp'])
shifts = []
window = 5
for i in range(window, len(sorted_history)):
recent = np.mean([h['sentiment'] for h in sorted_history[i-window:i]])
previous = np.mean([h['sentiment'] for h in sorted_history[max(0, i-2*window):i-window]])
change = recent - previous
if abs(change) >= threshold:
shifts.append({
'timestamp': sorted_history[i-1]['timestamp'],
'change': change,
'direction': 'positive' if change > 0 else 'negative',
'trigger_article': sorted_history[i-1]['article_id']
})
return shifts
class AlertSystem:
"""Generate alerts based on sentiment changes."""
def __init__(self, tracker: SentimentTracker):
self.tracker = tracker
self.alert_rules = []
self.sent_alerts = []
def add_rule(self,
entity: str,
condition: str,
threshold: float,
action: str):
"""Add an alerting rule."""
self.alert_rules.append({
'entity': entity,
'condition': condition,
'threshold': threshold,
'action': action
})
def check_alerts(self) -> List[Dict]:
"""Check all alert conditions."""
triggered = []
for rule in self.alert_rules:
entity = rule['entity']
condition = rule['condition']
threshold = rule['threshold']
if condition == 'sentiment_drop':
momentum = self.tracker.calculate_momentum(entity)
if momentum['momentum'] < -threshold:
triggered.append({
'rule': rule,
'value': momentum['momentum'],
'timestamp': datetime.now(),
'message': f"{entity} sentiment dropped by {abs(momentum['momentum']):.2f}"
})
elif condition == 'high_volume':
trend = self.tracker.get_trend(entity, days=1)
if not trend.empty:
volume = trend['mention_count'].sum()
if volume > threshold:
triggered.append({
'rule': rule,
'value': volume,
'timestamp': datetime.now(),
'message': f"{entity} has {volume} mentions today (threshold: {threshold})"
})
elif condition == 'negative_surge':
trend = self.tracker.get_trend(entity, days=1)
if not trend.empty:
avg_sent = trend['avg_sentiment'].mean()
if avg_sent < threshold:
triggered.append({
'rule': rule,
'value': avg_sent,
'timestamp': datetime.now(),
'message': f"{entity} daily sentiment at {avg_sent:.2f} (threshold: {threshold})"
})
return triggered
def format_alert(self, alert: Dict) -> str:
"""Format alert for notification."""
return f"""
SENTIMENT ALERT
===============
Entity: {alert['rule']['entity']}
Condition: {alert['rule']['condition']}
Current Value: {alert['value']:.2f}
Threshold: {alert['rule']['threshold']}
Time: {alert['timestamp'].strftime('%Y-%m-%d %H:%M')}
Message: {alert['message']}
"""
Step 3: Comparative Analysis
class ComparativeAnalyzer:
"""Compare sentiment across entities."""
def __init__(self, tracker: SentimentTracker):
self.tracker = tracker
def compare_teams(self,
teams: List[str],
days: int = 30) -> pd.DataFrame:
"""Compare sentiment across teams."""
comparisons = []
for team in teams:
trend = self.tracker.get_trend(team, days)
if trend.empty:
continue
comparisons.append({
'team': team,
'avg_sentiment': trend['avg_sentiment'].mean(),
'sentiment_trend': self._calculate_trend_direction(trend),
'total_mentions': trend['mention_count'].sum(),
'source_diversity': trend['source_count'].mean(),
'volatility': trend['sentiment_std'].mean()
})
return pd.DataFrame(comparisons).sort_values('avg_sentiment', ascending=False)
def _calculate_trend_direction(self, trend: pd.DataFrame) -> str:
"""Calculate overall trend direction."""
if len(trend) < 2:
return 'stable'
first_half = trend.head(len(trend)//2)['avg_sentiment'].mean()
second_half = trend.tail(len(trend)//2)['avg_sentiment'].mean()
change = second_half - first_half
if change > 0.05:
return 'improving'
elif change < -0.05:
return 'declining'
else:
return 'stable'
def find_correlation_events(self,
team: str,
event_dates: List[Tuple[datetime, str]]) -> Dict:
"""Correlate sentiment changes with known events."""
correlations = []
for event_date, event_name in event_dates:
# Get sentiment before and after event
pre_start = event_date - timedelta(days=3)
post_end = event_date + timedelta(days=3)
history = self.tracker.history[team]
pre_sentiment = [h['sentiment'] for h in history
if pre_start <= h['timestamp'] < event_date]
post_sentiment = [h['sentiment'] for h in history
if event_date <= h['timestamp'] <= post_end]
if pre_sentiment and post_sentiment:
pre_avg = np.mean(pre_sentiment)
post_avg = np.mean(post_sentiment)
change = post_avg - pre_avg
correlations.append({
'event': event_name,
'date': event_date,
'pre_sentiment': pre_avg,
'post_sentiment': post_avg,
'change': change,
'significant': abs(change) > 0.1
})
return {
'team': team,
'events_analyzed': len(correlations),
'correlations': correlations
}
def generate_report(self, teams: List[str], days: int = 30) -> str:
"""Generate comparative analysis report."""
comparison = self.compare_teams(teams, days)
report = f"""
MEDIA SENTIMENT COMPARISON REPORT
=================================
Period: Last {days} days
Teams Analyzed: {len(teams)}
RANKINGS BY AVERAGE SENTIMENT:
"""
for i, row in comparison.iterrows():
report += f"""
{i+1}. {row['team']}
- Average Sentiment: {row['avg_sentiment']:.3f}
- Trend: {row['sentiment_trend']}
- Total Mentions: {int(row['total_mentions'])}
- Volatility: {row['volatility']:.3f}
"""
# Add insights
best = comparison.iloc[0]
worst = comparison.iloc[-1]
report += f"""
KEY INSIGHTS:
- Highest sentiment: {best['team']} ({best['avg_sentiment']:.3f})
- Lowest sentiment: {worst['team']} ({worst['avg_sentiment']:.3f})
- Most volatile: {comparison.loc[comparison['volatility'].idxmax(), 'team']}
- Most coverage: {comparison.loc[comparison['total_mentions'].idxmax(), 'team']}
"""
return report
Results
Sentiment Tracking Dashboard
OHIO STATE MEDIA SENTIMENT DASHBOARD
====================================
Period: November 2024
Weekly Sentiment Trend:
Week 1: 0.72 ████████████████████░░░░ (245 mentions)
Week 2: 0.68 ██████████████████░░░░░░ (312 mentions)
Week 3: 0.75 ████████████████████░░░░ (198 mentions)
Week 4: 0.81 ██████████████████████░░ (456 mentions) ← The Game
Momentum: +0.09 (Improving)
Top Stories by Sentiment Impact:
1. "Ohio State dominates Michigan" (+0.23)
2. "Ryan Day addresses rivalry criticism" (+0.08)
3. "Injury concerns for CFP" (-0.12)
Source Distribution:
- National: 58%
- Local: 32%
- Social: 10%
Alert Summary
ALERTS TRIGGERED (LAST 7 DAYS)
==============================
[HIGH] Negative Sentiment Surge
Entity: Alabama
Value: 0.38 (threshold: 0.45)
Time: 2024-11-16 14:30
Cause: Post-loss coverage after upset
[MEDIUM] High Volume Alert
Entity: Ohio State
Value: 456 mentions (threshold: 300)
Time: 2024-11-30 18:00
Cause: Rivalry week coverage spike
[LOW] Trend Change Detected
Entity: Michigan
Value: -0.15 momentum
Time: 2024-11-25 09:00
Cause: Pre-game narrative shift
Comparative Analysis
CONFERENCE SENTIMENT COMPARISON
===============================
Big Ten East:
Team | Sentiment | Trend | Mentions
-------------|-----------|----------|--------
Ohio State | 0.74 | Stable | 1,234
Michigan | 0.68 | Declining| 1,189
Penn State | 0.71 | Improving| 876
Maryland | 0.62 | Stable | 345
SEC West:
Team | Sentiment | Trend | Mentions
-------------|-----------|----------|--------
Georgia | 0.78 | Stable | 1,456
Alabama | 0.58 | Declining| 1,389
Texas | 0.69 | Improving| 987
LSU | 0.64 | Stable | 723
Lessons Learned
-
Source Weighting: National media sentiment differs from local coverage; weight accordingly
-
Event Correlation: Major events cause 24-48 hour sentiment spikes that normalize
-
Rivalry Effects: Sentiment is relative; opponent coverage affects perception
-
Alert Tuning: Initial thresholds needed adjustment to reduce false positives
-
Context Matters: Same words have different sentiment in different contexts
Recommendations
- Real-Time Dashboard: Provide live sentiment tracking for communications team
- Response Protocols: Define response procedures for negative alerts
- Baseline Calibration: Establish normal sentiment ranges per team
- Source Expansion: Add podcast transcripts and video content analysis
- Predictive Alerts: Build models to predict sentiment changes before they occur