Possession Metrics

Beginner 10 min read 20 views Nov 27, 2025

# Possession Metrics ## Overview Possession metrics quantify team control of the ball through possession percentage, pass completion rates, and ball retention statistics. These fundamental metrics provide insights into team playing style and dominance. ## Key Metrics ### Possession Percentage - **Formula**: (Team Passes / Total Passes) × 100 - **Interpretation**: Higher values indicate greater ball control - **Typical Range**: 35-65% for most teams ### Pass Completion Rate - **Formula**: (Successful Passes / Total Pass Attempts) × 100 - **Benchmark**: Elite teams typically achieve 85-90% ### Ball Retention Index Combines possession duration with passing accuracy to measure effective ball control. ## Python Implementation ```python import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from scipy import stats class PossessionAnalyzer: """Analyze team possession metrics and ball retention.""" def __init__(self, match_data): """ Initialize analyzer with match event data. Parameters: ----------- match_data : pd.DataFrame Event data with columns: team, event_type, outcome, timestamp """ self.data = match_data self.possession_stats = None def calculate_possession_percentage(self): """Calculate possession percentage by team.""" # Count possession events (passes, carries, dribbles) possession_events = self.data[ self.data['event_type'].isin(['Pass', 'Carry', 'Dribble']) ] team_possessions = possession_events.groupby('team').size() total_possessions = team_possessions.sum() possession_pct = (team_possessions / total_possessions * 100).round(2) return possession_pct.to_dict() def calculate_pass_completion(self): """Calculate pass completion rates.""" passes = self.data[self.data['event_type'] == 'Pass'].copy() completion_stats = passes.groupby('team').agg({ 'outcome': [ ('total', 'count'), ('completed', lambda x: (x == 'Complete').sum()) ] }) completion_stats.columns = ['total_passes', 'completed_passes'] completion_stats['completion_rate'] = ( completion_stats['completed_passes'] / completion_stats['total_passes'] * 100 ).round(2) return completion_stats def calculate_ball_retention_index(self, window_seconds=10): """ Calculate ball retention index. Parameters: ----------- window_seconds : int Time window for measuring retention """ self.data = self.data.sort_values('timestamp') retention_scores = [] for team in self.data['team'].unique(): team_events = self.data[self.data['team'] == team].copy() # Calculate possession sequences team_events['time_diff'] = team_events['timestamp'].diff() team_events['new_sequence'] = team_events['time_diff'] > window_seconds team_events['sequence_id'] = team_events['new_sequence'].cumsum() # Analyze each sequence sequences = team_events.groupby('sequence_id').agg({ 'event_type': 'count', # Length of sequence 'outcome': lambda x: (x == 'Complete').sum() # Successful events }) sequences.columns = ['sequence_length', 'successful_events'] sequences['retention_score'] = ( sequences['successful_events'] / sequences['sequence_length'] ) avg_retention = sequences['retention_score'].mean() retention_scores.append({ 'team': team, 'ball_retention_index': round(avg_retention * 100, 2), 'avg_sequence_length': round(sequences['sequence_length'].mean(), 2) }) return pd.DataFrame(retention_scores) def calculate_possession_zones(self, positions): """ Calculate possession by field zone. Parameters: ----------- positions : pd.DataFrame Position data with columns: team, x, y (normalized 0-100) """ # Define zones positions['zone'] = pd.cut( positions['x'], bins=[0, 33, 66, 100], labels=['Defensive Third', 'Middle Third', 'Attacking Third'] ) zone_possession = positions.groupby(['team', 'zone']).size().unstack(fill_value=0) zone_possession_pct = zone_possession.div(zone_possession.sum(axis=1), axis=0) * 100 return zone_possession_pct.round(2) def generate_possession_report(self): """Generate comprehensive possession report.""" report = { 'possession_percentage': self.calculate_possession_percentage(), 'pass_completion': self.calculate_pass_completion(), 'retention': self.calculate_ball_retention_index() } self.possession_stats = report return report def visualize_possession_flow(self, team): """Visualize possession flow over time.""" team_data = self.data[self.data['team'] == team].copy() team_data = team_data.sort_values('timestamp') # Calculate rolling possession team_data['minute'] = (team_data['timestamp'] / 60).astype(int) possession_by_minute = team_data.groupby('minute').size() total_by_minute = self.data.groupby( (self.data['timestamp'] / 60).astype(int) ).size() possession_pct = (possession_by_minute / total_by_minute * 100).fillna(0) fig, ax = plt.subplots(figsize=(12, 6)) ax.plot(possession_pct.index, possession_pct.values, linewidth=2, color='#1f77b4') ax.fill_between(possession_pct.index, possession_pct.values, alpha=0.3, color='#1f77b4') ax.axhline(y=50, color='red', linestyle='--', alpha=0.5, label='50% Line') ax.set_xlabel('Match Minute', fontsize=12, fontweight='bold') ax.set_ylabel('Possession %', fontsize=12, fontweight='bold') ax.set_title(f'Possession Flow - {team}', fontsize=14, fontweight='bold') ax.grid(True, alpha=0.3) ax.legend() plt.tight_layout() return fig # Example Usage if __name__ == "__main__": # Sample match event data np.random.seed(42) events = [] timestamp = 0 teams = ['Team A', 'Team B'] for _ in range(1000): team = np.random.choice(teams, p=[0.55, 0.45]) # Team A has more possession event_type = np.random.choice( ['Pass', 'Carry', 'Dribble', 'Shot', 'Tackle'], p=[0.6, 0.2, 0.1, 0.05, 0.05] ) # Pass completion probability if event_type == 'Pass': outcome = np.random.choice( ['Complete', 'Incomplete'], p=[0.85, 0.15] if team == 'Team A' else [0.78, 0.22] ) else: outcome = 'Complete' events.append({ 'team': team, 'event_type': event_type, 'outcome': outcome, 'timestamp': timestamp }) timestamp += np.random.uniform(1, 5) match_data = pd.DataFrame(events) # Analyze possession analyzer = PossessionAnalyzer(match_data) # Calculate metrics possession_pct = analyzer.calculate_possession_percentage() print("Possession Percentage:") for team, pct in possession_pct.items(): print(f" {team}: {pct}%") print("\nPass Completion:") pass_stats = analyzer.calculate_pass_completion() print(pass_stats) print("\nBall Retention Index:") retention = analyzer.calculate_ball_retention_index() print(retention) # Visualize fig = analyzer.visualize_possession_flow('Team A') plt.savefig('possession_flow.png', dpi=300, bbox_inches='tight') print("\nPossession flow chart saved as 'possession_flow.png'") ``` ## R Implementation ```r library(tidyverse) library(lubridate) library(ggplot2) # Possession Metrics Analysis in R PossessionAnalyzer <- R6::R6Class("PossessionAnalyzer", public = list( data = NULL, initialize = function(match_data) { self$data <- match_data }, calculate_possession_percentage = function() { # Filter possession events possession_events <- self$data %>% filter(event_type %in% c('Pass', 'Carry', 'Dribble')) # Calculate percentage by team possession_stats <- possession_events %>% group_by(team) %>% summarise(n_events = n()) %>% mutate( total_events = sum(n_events), possession_pct = round(n_events / total_events * 100, 2) ) return(possession_stats) }, calculate_pass_completion = function() { pass_stats <- self$data %>% filter(event_type == 'Pass') %>% group_by(team) %>% summarise( total_passes = n(), completed_passes = sum(outcome == 'Complete'), completion_rate = round(completed_passes / total_passes * 100, 2) ) return(pass_stats) }, calculate_ball_retention_index = function(window_seconds = 10) { retention_data <- self$data %>% arrange(timestamp) %>% group_by(team) %>% mutate( time_diff = timestamp - lag(timestamp, default = 0), new_sequence = time_diff > window_seconds, sequence_id = cumsum(new_sequence) ) %>% group_by(team, sequence_id) %>% summarise( sequence_length = n(), successful_events = sum(outcome == 'Complete'), .groups = 'drop' ) %>% mutate(retention_score = successful_events / sequence_length) # Calculate average retention by team retention_summary <- retention_data %>% group_by(team) %>% summarise( ball_retention_index = round(mean(retention_score) * 100, 2), avg_sequence_length = round(mean(sequence_length), 2) ) return(retention_summary) }, calculate_possession_zones = function(positions) { # Define field zones zone_possession <- positions %>% mutate( zone = cut(x, breaks = c(0, 33, 66, 100), labels = c('Defensive Third', 'Middle Third', 'Attacking Third') ) ) %>% group_by(team, zone) %>% summarise(n_events = n(), .groups = 'drop') %>% group_by(team) %>% mutate( total_events = sum(n_events), zone_pct = round(n_events / total_events * 100, 2) ) return(zone_possession) }, visualize_possession_flow = function(selected_team) { # Calculate possession by minute possession_flow <- self$data %>% mutate(minute = floor(timestamp / 60)) %>% group_by(minute, team) %>% summarise(n_events = n(), .groups = 'drop') %>% group_by(minute) %>% mutate( total_events = sum(n_events), possession_pct = n_events / total_events * 100 ) %>% filter(team == selected_team) # Create plot p <- ggplot(possession_flow, aes(x = minute, y = possession_pct)) + geom_line(color = '#1f77b4', size = 1.2) + geom_ribbon(aes(ymin = 0, ymax = possession_pct), fill = '#1f77b4', alpha = 0.3) + geom_hline(yintercept = 50, color = 'red', linetype = 'dashed', alpha = 0.5) + labs( title = paste('Possession Flow -', selected_team), x = 'Match Minute', y = 'Possession %' ) + theme_minimal() + theme( plot.title = element_text(face = 'bold', size = 14), axis.title = element_text(face = 'bold', size = 12) ) return(p) } ) ) # Example usage set.seed(42) # Generate sample match data n_events <- 1000 teams <- c('Team A', 'Team B') match_data <- tibble( team = sample(teams, n_events, replace = TRUE, prob = c(0.55, 0.45)), event_type = sample( c('Pass', 'Carry', 'Dribble', 'Shot', 'Tackle'), n_events, replace = TRUE, prob = c(0.6, 0.2, 0.1, 0.05, 0.05) ), timestamp = cumsum(runif(n_events, 1, 5)) ) %>% mutate( outcome = if_else( event_type == 'Pass', sample(c('Complete', 'Incomplete'), n(), replace = TRUE, prob = if_else(team == 'Team A', list(c(0.85, 0.15)), list(c(0.78, 0.22)))[[1]]), 'Complete' ) ) # Analyze possession metrics analyzer <- PossessionAnalyzer$new(match_data) cat("Possession Percentage:\n") print(analyzer$calculate_possession_percentage()) cat("\nPass Completion:\n") print(analyzer$calculate_pass_completion()) cat("\nBall Retention Index:\n") print(analyzer$calculate_ball_retention_index()) # Visualize p <- analyzer$visualize_possession_flow('Team A') ggsave('possession_flow_r.png', p, width = 12, height = 6, dpi = 300) cat("\nPossession flow chart saved\n") ``` ## Best Practices 1. **Context Matters**: Consider match context (score, tactics, opponent) 2. **Quality Over Quantity**: High possession without chances is ineffective 3. **Zone Analysis**: Possession location is as important as percentage 4. **Temporal Analysis**: Track possession trends throughout match 5. **Comparative Benchmarking**: Compare against team averages and opponents ## Practical Applications - **Match Analysis**: Evaluate team performance and tactical execution - **Opponent Scouting**: Identify possession-based playing styles - **Player Evaluation**: Assess individual contribution to ball retention - **Tactical Planning**: Design strategies based on possession profiles - **Performance Tracking**: Monitor possession trends across season

Passing Networks (Soccer) Next

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.

Table of Contents

Possession Metrics

Test Your Knowledge

Discussion