Possession Metrics

Beginner 10 min read 0 views Nov 27, 2025
# Possession Metrics ## Overview Possession metrics quantify team control of the ball through possession percentage, pass completion rates, and ball retention statistics. These fundamental metrics provide insights into team playing style and dominance. ## Key Metrics ### Possession Percentage - **Formula**: (Team Passes / Total Passes) × 100 - **Interpretation**: Higher values indicate greater ball control - **Typical Range**: 35-65% for most teams ### Pass Completion Rate - **Formula**: (Successful Passes / Total Pass Attempts) × 100 - **Benchmark**: Elite teams typically achieve 85-90% ### Ball Retention Index Combines possession duration with passing accuracy to measure effective ball control. ## Python Implementation ```python import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from scipy import stats class PossessionAnalyzer: """Analyze team possession metrics and ball retention.""" def __init__(self, match_data): """ Initialize analyzer with match event data. Parameters: ----------- match_data : pd.DataFrame Event data with columns: team, event_type, outcome, timestamp """ self.data = match_data self.possession_stats = None def calculate_possession_percentage(self): """Calculate possession percentage by team.""" # Count possession events (passes, carries, dribbles) possession_events = self.data[ self.data['event_type'].isin(['Pass', 'Carry', 'Dribble']) ] team_possessions = possession_events.groupby('team').size() total_possessions = team_possessions.sum() possession_pct = (team_possessions / total_possessions * 100).round(2) return possession_pct.to_dict() def calculate_pass_completion(self): """Calculate pass completion rates.""" passes = self.data[self.data['event_type'] == 'Pass'].copy() completion_stats = passes.groupby('team').agg({ 'outcome': [ ('total', 'count'), ('completed', lambda x: (x == 'Complete').sum()) ] }) completion_stats.columns = ['total_passes', 'completed_passes'] completion_stats['completion_rate'] = ( completion_stats['completed_passes'] / completion_stats['total_passes'] * 100 ).round(2) return completion_stats def calculate_ball_retention_index(self, window_seconds=10): """ Calculate ball retention index. Parameters: ----------- window_seconds : int Time window for measuring retention """ self.data = self.data.sort_values('timestamp') retention_scores = [] for team in self.data['team'].unique(): team_events = self.data[self.data['team'] == team].copy() # Calculate possession sequences team_events['time_diff'] = team_events['timestamp'].diff() team_events['new_sequence'] = team_events['time_diff'] > window_seconds team_events['sequence_id'] = team_events['new_sequence'].cumsum() # Analyze each sequence sequences = team_events.groupby('sequence_id').agg({ 'event_type': 'count', # Length of sequence 'outcome': lambda x: (x == 'Complete').sum() # Successful events }) sequences.columns = ['sequence_length', 'successful_events'] sequences['retention_score'] = ( sequences['successful_events'] / sequences['sequence_length'] ) avg_retention = sequences['retention_score'].mean() retention_scores.append({ 'team': team, 'ball_retention_index': round(avg_retention * 100, 2), 'avg_sequence_length': round(sequences['sequence_length'].mean(), 2) }) return pd.DataFrame(retention_scores) def calculate_possession_zones(self, positions): """ Calculate possession by field zone. Parameters: ----------- positions : pd.DataFrame Position data with columns: team, x, y (normalized 0-100) """ # Define zones positions['zone'] = pd.cut( positions['x'], bins=[0, 33, 66, 100], labels=['Defensive Third', 'Middle Third', 'Attacking Third'] ) zone_possession = positions.groupby(['team', 'zone']).size().unstack(fill_value=0) zone_possession_pct = zone_possession.div(zone_possession.sum(axis=1), axis=0) * 100 return zone_possession_pct.round(2) def generate_possession_report(self): """Generate comprehensive possession report.""" report = { 'possession_percentage': self.calculate_possession_percentage(), 'pass_completion': self.calculate_pass_completion(), 'retention': self.calculate_ball_retention_index() } self.possession_stats = report return report def visualize_possession_flow(self, team): """Visualize possession flow over time.""" team_data = self.data[self.data['team'] == team].copy() team_data = team_data.sort_values('timestamp') # Calculate rolling possession team_data['minute'] = (team_data['timestamp'] / 60).astype(int) possession_by_minute = team_data.groupby('minute').size() total_by_minute = self.data.groupby( (self.data['timestamp'] / 60).astype(int) ).size() possession_pct = (possession_by_minute / total_by_minute * 100).fillna(0) fig, ax = plt.subplots(figsize=(12, 6)) ax.plot(possession_pct.index, possession_pct.values, linewidth=2, color='#1f77b4') ax.fill_between(possession_pct.index, possession_pct.values, alpha=0.3, color='#1f77b4') ax.axhline(y=50, color='red', linestyle='--', alpha=0.5, label='50% Line') ax.set_xlabel('Match Minute', fontsize=12, fontweight='bold') ax.set_ylabel('Possession %', fontsize=12, fontweight='bold') ax.set_title(f'Possession Flow - {team}', fontsize=14, fontweight='bold') ax.grid(True, alpha=0.3) ax.legend() plt.tight_layout() return fig # Example Usage if __name__ == "__main__": # Sample match event data np.random.seed(42) events = [] timestamp = 0 teams = ['Team A', 'Team B'] for _ in range(1000): team = np.random.choice(teams, p=[0.55, 0.45]) # Team A has more possession event_type = np.random.choice( ['Pass', 'Carry', 'Dribble', 'Shot', 'Tackle'], p=[0.6, 0.2, 0.1, 0.05, 0.05] ) # Pass completion probability if event_type == 'Pass': outcome = np.random.choice( ['Complete', 'Incomplete'], p=[0.85, 0.15] if team == 'Team A' else [0.78, 0.22] ) else: outcome = 'Complete' events.append({ 'team': team, 'event_type': event_type, 'outcome': outcome, 'timestamp': timestamp }) timestamp += np.random.uniform(1, 5) match_data = pd.DataFrame(events) # Analyze possession analyzer = PossessionAnalyzer(match_data) # Calculate metrics possession_pct = analyzer.calculate_possession_percentage() print("Possession Percentage:") for team, pct in possession_pct.items(): print(f" {team}: {pct}%") print("\nPass Completion:") pass_stats = analyzer.calculate_pass_completion() print(pass_stats) print("\nBall Retention Index:") retention = analyzer.calculate_ball_retention_index() print(retention) # Visualize fig = analyzer.visualize_possession_flow('Team A') plt.savefig('possession_flow.png', dpi=300, bbox_inches='tight') print("\nPossession flow chart saved as 'possession_flow.png'") ``` ## R Implementation ```r library(tidyverse) library(lubridate) library(ggplot2) # Possession Metrics Analysis in R PossessionAnalyzer <- R6::R6Class("PossessionAnalyzer", public = list( data = NULL, initialize = function(match_data) { self$data <- match_data }, calculate_possession_percentage = function() { # Filter possession events possession_events <- self$data %>% filter(event_type %in% c('Pass', 'Carry', 'Dribble')) # Calculate percentage by team possession_stats <- possession_events %>% group_by(team) %>% summarise(n_events = n()) %>% mutate( total_events = sum(n_events), possession_pct = round(n_events / total_events * 100, 2) ) return(possession_stats) }, calculate_pass_completion = function() { pass_stats <- self$data %>% filter(event_type == 'Pass') %>% group_by(team) %>% summarise( total_passes = n(), completed_passes = sum(outcome == 'Complete'), completion_rate = round(completed_passes / total_passes * 100, 2) ) return(pass_stats) }, calculate_ball_retention_index = function(window_seconds = 10) { retention_data <- self$data %>% arrange(timestamp) %>% group_by(team) %>% mutate( time_diff = timestamp - lag(timestamp, default = 0), new_sequence = time_diff > window_seconds, sequence_id = cumsum(new_sequence) ) %>% group_by(team, sequence_id) %>% summarise( sequence_length = n(), successful_events = sum(outcome == 'Complete'), .groups = 'drop' ) %>% mutate(retention_score = successful_events / sequence_length) # Calculate average retention by team retention_summary <- retention_data %>% group_by(team) %>% summarise( ball_retention_index = round(mean(retention_score) * 100, 2), avg_sequence_length = round(mean(sequence_length), 2) ) return(retention_summary) }, calculate_possession_zones = function(positions) { # Define field zones zone_possession <- positions %>% mutate( zone = cut(x, breaks = c(0, 33, 66, 100), labels = c('Defensive Third', 'Middle Third', 'Attacking Third') ) ) %>% group_by(team, zone) %>% summarise(n_events = n(), .groups = 'drop') %>% group_by(team) %>% mutate( total_events = sum(n_events), zone_pct = round(n_events / total_events * 100, 2) ) return(zone_possession) }, visualize_possession_flow = function(selected_team) { # Calculate possession by minute possession_flow <- self$data %>% mutate(minute = floor(timestamp / 60)) %>% group_by(minute, team) %>% summarise(n_events = n(), .groups = 'drop') %>% group_by(minute) %>% mutate( total_events = sum(n_events), possession_pct = n_events / total_events * 100 ) %>% filter(team == selected_team) # Create plot p <- ggplot(possession_flow, aes(x = minute, y = possession_pct)) + geom_line(color = '#1f77b4', size = 1.2) + geom_ribbon(aes(ymin = 0, ymax = possession_pct), fill = '#1f77b4', alpha = 0.3) + geom_hline(yintercept = 50, color = 'red', linetype = 'dashed', alpha = 0.5) + labs( title = paste('Possession Flow -', selected_team), x = 'Match Minute', y = 'Possession %' ) + theme_minimal() + theme( plot.title = element_text(face = 'bold', size = 14), axis.title = element_text(face = 'bold', size = 12) ) return(p) } ) ) # Example usage set.seed(42) # Generate sample match data n_events <- 1000 teams <- c('Team A', 'Team B') match_data <- tibble( team = sample(teams, n_events, replace = TRUE, prob = c(0.55, 0.45)), event_type = sample( c('Pass', 'Carry', 'Dribble', 'Shot', 'Tackle'), n_events, replace = TRUE, prob = c(0.6, 0.2, 0.1, 0.05, 0.05) ), timestamp = cumsum(runif(n_events, 1, 5)) ) %>% mutate( outcome = if_else( event_type == 'Pass', sample(c('Complete', 'Incomplete'), n(), replace = TRUE, prob = if_else(team == 'Team A', list(c(0.85, 0.15)), list(c(0.78, 0.22)))[[1]]), 'Complete' ) ) # Analyze possession metrics analyzer <- PossessionAnalyzer$new(match_data) cat("Possession Percentage:\n") print(analyzer$calculate_possession_percentage()) cat("\nPass Completion:\n") print(analyzer$calculate_pass_completion()) cat("\nBall Retention Index:\n") print(analyzer$calculate_ball_retention_index()) # Visualize p <- analyzer$visualize_possession_flow('Team A') ggsave('possession_flow_r.png', p, width = 12, height = 6, dpi = 300) cat("\nPossession flow chart saved\n") ``` ## Best Practices 1. **Context Matters**: Consider match context (score, tactics, opponent) 2. **Quality Over Quantity**: High possession without chances is ineffective 3. **Zone Analysis**: Possession location is as important as percentage 4. **Temporal Analysis**: Track possession trends throughout match 5. **Comparative Benchmarking**: Compare against team averages and opponents ## Practical Applications - **Match Analysis**: Evaluate team performance and tactical execution - **Opponent Scouting**: Identify possession-based playing styles - **Player Evaluation**: Assess individual contribution to ball retention - **Tactical Planning**: Design strategies based on possession profiles - **Performance Tracking**: Monitor possession trends across season

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.
Table of Contents
Quick Actions
Glossary