Play-by-Play Data Handling
Beginner
10 min read
1 views
Nov 27, 2025
# Play-by-Play Data Handling
## Introduction
Play-by-play (PBP) data captures every event in a basketball game, including shots, turnovers, fouls, and substitutions. This guide covers parsing and analyzing PBP data.
## Python Implementation
```python
import pandas as pd
import numpy as np
from datetime import datetime
class PlayByPlayProcessor:
def __init__(self, pbp_data):
"""Initialize with play-by-play DataFrame"""
self.data = pbp_data
self.event_types = {
1: 'FIELD_GOAL_MADE',
2: 'FIELD_GOAL_MISSED',
3: 'FREE_THROW',
4: 'REBOUND',
5: 'TURNOVER',
6: 'FOUL',
8: 'SUBSTITUTION',
10: 'JUMP_BALL',
12: 'START_PERIOD',
13: 'END_PERIOD'
}
def parse_events(self):
"""Parse and categorize play-by-play events"""
self.data['event_type_name'] = self.data['EVENTMSGTYPE'].map(self.event_types)
return self.data
def calculate_possessions(self):
"""Estimate number of possessions"""
# Possessions = FGA + 0.4*FTA - ORB + TOV
fga = len(self.data[self.data['EVENTMSGTYPE'].isin([1, 2])])
fta = len(self.data[self.data['EVENTMSGTYPE'] == 3])
orb = len(self.data[
(self.data['EVENTMSGTYPE'] == 4) &
(self.data['EVENTMSGACTIONTYPE'] == 0)
])
tov = len(self.data[self.data['EVENTMSGTYPE'] == 5])
possessions = fga + 0.4 * fta - orb + tov
return possessions
def get_lineup_stints(self):
"""Extract lineup stints from substitution events"""
subs = self.data[self.data['EVENTMSGTYPE'] == 8].copy()
stints = []
current_lineup = set()
for idx, row in subs.iterrows():
# Player out
if row['PLAYER1_ID'] in current_lineup:
current_lineup.remove(row['PLAYER1_ID'])
# Player in
if pd.notna(row['PLAYER2_ID']):
current_lineup.add(row['PLAYER2_ID'])
stints.append({
'time': row['PCTIMESTRING'],
'period': row['PERIOD'],
'lineup': current_lineup.copy()
})
return pd.DataFrame(stints)
def calculate_shooting_metrics(self, player_id=None):
"""Calculate shooting efficiency metrics"""
shots = self.data[self.data['EVENTMSGTYPE'].isin([1, 2])].copy()
if player_id:
shots = shots[shots['PLAYER1_ID'] == player_id]
# Identify shot types
shots['is_three'] = shots['EVENTMSGACTIONTYPE'].isin([1, 2])
shots['made'] = shots['EVENTMSGTYPE'] == 1
metrics = {
'total_shots': len(shots),
'made_shots': shots['made'].sum(),
'fg_pct': shots['made'].mean() * 100,
'three_pt_attempts': shots['is_three'].sum(),
'three_pt_made': shots[shots['is_three']]['made'].sum(),
'three_pt_pct': (shots[shots['is_three']]['made'].mean() * 100
if shots['is_three'].sum() > 0 else 0)
}
return metrics
def create_shot_chart_data(self):
"""Prepare data for shot chart visualization"""
shots = self.data[self.data['EVENTMSGTYPE'].isin([1, 2])].copy()
# Parse location description if available
shots['made'] = shots['EVENTMSGTYPE'] == 1
shots['x'] = shots['LOC_X'] if 'LOC_X' in shots.columns else np.nan
shots['y'] = shots['LOC_Y'] if 'LOC_Y' in shots.columns else np.nan
return shots[['PLAYER1_NAME', 'x', 'y', 'made', 'EVENTMSGACTIONTYPE']]
# Usage example
# pbp = PlayByPlayProcessor(pbp_df)
# pbp.parse_events()
# metrics = pbp.calculate_shooting_metrics(player_id=201935)
# print(metrics)
```
## R Implementation
```r
library(dplyr)
library(tidyr)
library(stringr)
process_play_by_play <- function(pbp_data) {
event_types <- c(
"1" = "FIELD_GOAL_MADE",
"2" = "FIELD_GOAL_MISSED",
"3" = "FREE_THROW",
"4" = "REBOUND",
"5" = "TURNOVER",
"6" = "FOUL",
"8" = "SUBSTITUTION",
"10" = "JUMP_BALL",
"12" = "START_PERIOD",
"13" = "END_PERIOD"
)
pbp_data %>%
mutate(
event_type_name = event_types[as.character(EVENTMSGTYPE)]
)
}
calculate_possessions <- function(pbp_data) {
fga <- pbp_data %>%
filter(EVENTMSGTYPE %in% c(1, 2)) %>%
nrow()
fta <- pbp_data %>%
filter(EVENTMSGTYPE == 3) %>%
nrow()
orb <- pbp_data %>%
filter(EVENTMSGTYPE == 4, EVENTMSGACTIONTYPE == 0) %>%
nrow()
tov <- pbp_data %>%
filter(EVENTMSGTYPE == 5) %>%
nrow()
possessions <- fga + 0.4 * fta - orb + tov
return(possessions)
}
calculate_shooting_metrics <- function(pbp_data, player_id = NULL) {
shots <- pbp_data %>%
filter(EVENTMSGTYPE %in% c(1, 2))
if (!is.null(player_id)) {
shots <- shots %>% filter(PLAYER1_ID == player_id)
}
shots <- shots %>%
mutate(
is_three = EVENTMSGACTIONTYPE %in% c(1, 2),
made = EVENTMSGTYPE == 1
)
metrics <- list(
total_shots = nrow(shots),
made_shots = sum(shots$made),
fg_pct = mean(shots$made) * 100,
three_pt_attempts = sum(shots$is_three),
three_pt_made = sum(shots$is_three & shots$made),
three_pt_pct = if(sum(shots$is_three) > 0) {
mean(shots$made[shots$is_three]) * 100
} else {
0
}
)
return(metrics)
}
# Usage
# pbp_processed <- process_play_by_play(pbp_df)
# metrics <- calculate_shooting_metrics(pbp_processed, player_id = 201935)
```
## Key PBP Event Types
1. **Scoring Events** - Field goals and free throws
2. **Rebounds** - Offensive and defensive
3. **Turnovers** - Lost balls, bad passes, violations
4. **Fouls** - Personal, shooting, technical
5. **Substitutions** - Player rotations
## Advanced Analysis
- Possession-based metrics
- Lineup analysis
- Clutch performance (last 5 min, close games)
- Shot sequencing
- Play type classification
Discussion
Have questions or feedback? Join our community discussion on
Discord or
GitHub Discussions.
Table of Contents
Related Topics
Quick Actions