Play-by-Play Data Handling

Beginner 10 min read 0 views Nov 27, 2025
# Play-by-Play Data Handling ## Introduction Play-by-play (PBP) data captures every event in a basketball game, including shots, turnovers, fouls, and substitutions. This guide covers parsing and analyzing PBP data. ## Python Implementation ```python import pandas as pd import numpy as np from datetime import datetime class PlayByPlayProcessor: def __init__(self, pbp_data): """Initialize with play-by-play DataFrame""" self.data = pbp_data self.event_types = { 1: 'FIELD_GOAL_MADE', 2: 'FIELD_GOAL_MISSED', 3: 'FREE_THROW', 4: 'REBOUND', 5: 'TURNOVER', 6: 'FOUL', 8: 'SUBSTITUTION', 10: 'JUMP_BALL', 12: 'START_PERIOD', 13: 'END_PERIOD' } def parse_events(self): """Parse and categorize play-by-play events""" self.data['event_type_name'] = self.data['EVENTMSGTYPE'].map(self.event_types) return self.data def calculate_possessions(self): """Estimate number of possessions""" # Possessions = FGA + 0.4*FTA - ORB + TOV fga = len(self.data[self.data['EVENTMSGTYPE'].isin([1, 2])]) fta = len(self.data[self.data['EVENTMSGTYPE'] == 3]) orb = len(self.data[ (self.data['EVENTMSGTYPE'] == 4) & (self.data['EVENTMSGACTIONTYPE'] == 0) ]) tov = len(self.data[self.data['EVENTMSGTYPE'] == 5]) possessions = fga + 0.4 * fta - orb + tov return possessions def get_lineup_stints(self): """Extract lineup stints from substitution events""" subs = self.data[self.data['EVENTMSGTYPE'] == 8].copy() stints = [] current_lineup = set() for idx, row in subs.iterrows(): # Player out if row['PLAYER1_ID'] in current_lineup: current_lineup.remove(row['PLAYER1_ID']) # Player in if pd.notna(row['PLAYER2_ID']): current_lineup.add(row['PLAYER2_ID']) stints.append({ 'time': row['PCTIMESTRING'], 'period': row['PERIOD'], 'lineup': current_lineup.copy() }) return pd.DataFrame(stints) def calculate_shooting_metrics(self, player_id=None): """Calculate shooting efficiency metrics""" shots = self.data[self.data['EVENTMSGTYPE'].isin([1, 2])].copy() if player_id: shots = shots[shots['PLAYER1_ID'] == player_id] # Identify shot types shots['is_three'] = shots['EVENTMSGACTIONTYPE'].isin([1, 2]) shots['made'] = shots['EVENTMSGTYPE'] == 1 metrics = { 'total_shots': len(shots), 'made_shots': shots['made'].sum(), 'fg_pct': shots['made'].mean() * 100, 'three_pt_attempts': shots['is_three'].sum(), 'three_pt_made': shots[shots['is_three']]['made'].sum(), 'three_pt_pct': (shots[shots['is_three']]['made'].mean() * 100 if shots['is_three'].sum() > 0 else 0) } return metrics def create_shot_chart_data(self): """Prepare data for shot chart visualization""" shots = self.data[self.data['EVENTMSGTYPE'].isin([1, 2])].copy() # Parse location description if available shots['made'] = shots['EVENTMSGTYPE'] == 1 shots['x'] = shots['LOC_X'] if 'LOC_X' in shots.columns else np.nan shots['y'] = shots['LOC_Y'] if 'LOC_Y' in shots.columns else np.nan return shots[['PLAYER1_NAME', 'x', 'y', 'made', 'EVENTMSGACTIONTYPE']] # Usage example # pbp = PlayByPlayProcessor(pbp_df) # pbp.parse_events() # metrics = pbp.calculate_shooting_metrics(player_id=201935) # print(metrics) ``` ## R Implementation ```r library(dplyr) library(tidyr) library(stringr) process_play_by_play <- function(pbp_data) { event_types <- c( "1" = "FIELD_GOAL_MADE", "2" = "FIELD_GOAL_MISSED", "3" = "FREE_THROW", "4" = "REBOUND", "5" = "TURNOVER", "6" = "FOUL", "8" = "SUBSTITUTION", "10" = "JUMP_BALL", "12" = "START_PERIOD", "13" = "END_PERIOD" ) pbp_data %>% mutate( event_type_name = event_types[as.character(EVENTMSGTYPE)] ) } calculate_possessions <- function(pbp_data) { fga <- pbp_data %>% filter(EVENTMSGTYPE %in% c(1, 2)) %>% nrow() fta <- pbp_data %>% filter(EVENTMSGTYPE == 3) %>% nrow() orb <- pbp_data %>% filter(EVENTMSGTYPE == 4, EVENTMSGACTIONTYPE == 0) %>% nrow() tov <- pbp_data %>% filter(EVENTMSGTYPE == 5) %>% nrow() possessions <- fga + 0.4 * fta - orb + tov return(possessions) } calculate_shooting_metrics <- function(pbp_data, player_id = NULL) { shots <- pbp_data %>% filter(EVENTMSGTYPE %in% c(1, 2)) if (!is.null(player_id)) { shots <- shots %>% filter(PLAYER1_ID == player_id) } shots <- shots %>% mutate( is_three = EVENTMSGACTIONTYPE %in% c(1, 2), made = EVENTMSGTYPE == 1 ) metrics <- list( total_shots = nrow(shots), made_shots = sum(shots$made), fg_pct = mean(shots$made) * 100, three_pt_attempts = sum(shots$is_three), three_pt_made = sum(shots$is_three & shots$made), three_pt_pct = if(sum(shots$is_three) > 0) { mean(shots$made[shots$is_three]) * 100 } else { 0 } ) return(metrics) } # Usage # pbp_processed <- process_play_by_play(pbp_df) # metrics <- calculate_shooting_metrics(pbp_processed, player_id = 201935) ``` ## Key PBP Event Types 1. **Scoring Events** - Field goals and free throws 2. **Rebounds** - Offensive and defensive 3. **Turnovers** - Lost balls, bad passes, violations 4. **Fouls** - Personal, shooting, technical 5. **Substitutions** - Player rotations ## Advanced Analysis - Possession-based metrics - Lineup analysis - Clutch performance (last 5 min, close games) - Shot sequencing - Play type classification

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.