Accessing WNBA Data Sources
Beginner
10 min read
1 views
Nov 27, 2025
WNBA Data Landscape
Accessing quality WNBA data has historically been more challenging than NBA data, but the landscape has improved significantly in recent years. Modern tools and packages now provide programmatic access to play-by-play, box scores, player tracking, and advanced statistics.
Primary WNBA Data Sources
- ESPN API: Play-by-play, box scores, team/player stats
- WNBA Stats: Official league statistics (stats.wnba.com)
- Basketball Reference: Historical data and advanced metrics
- wehoop Package (R): Comprehensive R interface to WNBA data
- Synergy Sports: Advanced tracking data (subscription required)
Python: Accessing WNBA Data
Python: WNBA Data Collection Framework
import pandas as pd
import requests
import json
from datetime import datetime, timedelta
class WNBADataCollector:
"""Comprehensive WNBA data collection class"""
def __init__(self):
self.base_url = "https://site.api.espn.com/apis/site/v2/sports"
self.wnba_endpoint = f"{self.base_url}/basketball/wnba"
def get_season_scoreboard(self, season=2024):
"""Get all games for a WNBA season"""
url = f"{self.wnba_endpoint}/scoreboard"
params = {
'limit': 1000,
'dates': season
}
response = requests.get(url, params=params)
if response.status_code == 200:
return response.json()
return None
def get_team_stats(self, season=2024):
"""Fetch team statistics for season"""
url = f"{self.wnba_endpoint}/teams"
response = requests.get(url)
if response.status_code != 200:
return None
teams_data = response.json()
teams_list = []
for team in teams_data.get('sports', [{}])[0].get('leagues', [{}])[0].get('teams', []):
team_info = team.get('team', {})
teams_list.append({
'team_id': team_info.get('id'),
'name': team_info.get('displayName'),
'abbreviation': team_info.get('abbreviation'),
'location': team_info.get('location')
})
return pd.DataFrame(teams_list)
def get_player_stats(self, season=2024):
"""Fetch player statistics"""
# ESPN doesn't have direct player stats endpoint for WNBA
# Alternative: scrape from games or use Basketball Reference
url = f"{self.wnba_endpoint}/athletes"
response = requests.get(url)
if response.status_code != 200:
return None
data = response.json()
players_list = []
for athlete in data.get('athletes', []):
players_list.append({
'player_id': athlete.get('id'),
'name': athlete.get('fullName'),
'position': athlete.get('position', {}).get('abbreviation'),
'team': athlete.get('team', {}).get('abbreviation')
})
return pd.DataFrame(players_list)
def get_game_details(self, game_id):
"""Get detailed play-by-play for specific game"""
url = f"{self.wnba_endpoint}/summary"
params = {'event': game_id}
response = requests.get(url, params=params)
if response.status_code == 200:
return response.json()
return None
def extract_box_score(self, game_id):
"""Extract box score from game"""
game_data = self.get_game_details(game_id)
if not game_data:
return None
box_score = game_data.get('boxscore', {})
players = box_score.get('players', [])
all_player_stats = []
for team in players:
team_name = team.get('team', {}).get('displayName')
team_abbr = team.get('team', {}).get('abbreviation')
for player in team.get('statistics', [{}])[0].get('athletes', []):
stats = {
'game_id': game_id,
'team': team_abbr,
'player': player.get('athlete', {}).get('displayName'),
'player_id': player.get('athlete', {}).get('id')
}
# Extract statistics
for stat in player.get('stats', []):
stats[stat] = player['stats'][stat]
all_player_stats.append(stats)
return pd.DataFrame(all_player_stats)
# Example usage
collector = WNBADataCollector()
# Get team data
teams = collector.get_team_stats(season=2024)
print("WNBA Teams:")
print(teams)
# Get scoreboard for specific date
scoreboard = collector.get_season_scoreboard(season=2024)
if scoreboard and 'events' in scoreboard:
games = scoreboard['events']
print(f"\nFound {len(games)} games in 2024 season")
# Get box score for first game
if games:
first_game_id = games[0]['id']
box_score = collector.extract_box_score(first_game_id)
print(f"\nBox Score Sample:")
print(box_score.head())
# Advanced: Calculate team efficiency metrics
def calculate_team_efficiency(box_scores_df):
"""Calculate offensive and defensive efficiency from box scores"""
team_stats = box_scores_df.groupby('team').agg({
'PTS': 'sum',
'FGM': 'sum',
'FGA': 'sum',
'FG3M': 'sum',
'FTM': 'sum',
'FTA': 'sum',
'REB': 'sum',
'AST': 'sum',
'TOV': 'sum'
}).reset_index()
# Calculate efficiency metrics
team_stats['FG_PCT'] = team_stats['FGM'] / team_stats['FGA']
team_stats['TS_PCT'] = (
team_stats['PTS'] /
(2 * (team_stats['FGA'] + 0.44 * team_stats['FTA']))
)
team_stats['AST_TO_RATIO'] = team_stats['AST'] / team_stats['TOV']
return team_stats
print("\n=== WNBA Data Access Methods ===")
print("1. ESPN API - Play-by-play and box scores")
print("2. Direct web scraping - Basketball Reference")
print("3. Manual data collection - WNBA Stats website")
print("4. R wehoop package - Most comprehensive for WNBA")
R: WNBA Data Access with wehoop
library(wehoop)
library(tidyverse)
library(lubridate)
# The wehoop package provides the most comprehensive WNBA data access in R
# =============================================================================
# 1. Get WNBA Team Information
# =============================================================================
# Load team data
wnba_teams <- wehoop::wnba_teams()
cat("WNBA Teams:\n")
print(wnba_teams %>%
select(display_name, abbreviation, location, color))
# =============================================================================
# 2. Load Play-by-Play Data
# =============================================================================
# Load WNBA play-by-play for 2024 season
pbp_2024 <- wehoop::load_wnba_pbp(seasons = 2024)
cat("\nPlay-by-Play Data Structure:\n")
print(glimpse(pbp_2024))
# Sample of play-by-play data
cat("\nSample Plays:\n")
print(pbp_2024 %>%
select(game_date, home_team, away_team, type_text,
score_value, home_score, away_score) %>%
head(10))
# =============================================================================
# 3. Load Team Box Scores
# =============================================================================
# Load team box scores for season
team_box <- wehoop::load_wnba_team_box(seasons = 2024)
cat("\nTeam Box Score Summary:\n")
print(team_box %>%
select(game_date, team_display_name, team_winner,
field_goals_made, field_goals_attempted,
three_point_field_goals_made, total_rebounds,
assists, turnovers) %>%
head(10))
# =============================================================================
# 4. Load Player Box Scores
# =============================================================================
# Load player box scores
player_box <- wehoop::load_wnba_player_box(seasons = 2024)
cat("\nPlayer Box Score Sample:\n")
print(player_box %>%
select(athlete_display_name, team_display_name,
minutes, points, rebounds, assists, steals, blocks) %>%
arrange(desc(points)) %>%
head(10))
# =============================================================================
# 5. Advanced Data Processing
# =============================================================================
# Calculate team efficiency metrics
team_efficiency <- team_box %>%
group_by(team_display_name) %>%
summarise(
games = n(),
ppg = mean(team_score, na.rm = TRUE),
fg_pct = sum(field_goals_made) / sum(field_goals_attempted),
three_pt_pct = sum(three_point_field_goals_made) /
sum(three_point_field_goals_attempted),
ft_pct = sum(free_throws_made) / sum(free_throws_attempted),
rpg = mean(total_rebounds, na.rm = TRUE),
apg = mean(assists, na.rm = TRUE),
tov_pg = mean(turnovers, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
ts_pct = team_score / (2 * (field_goals_attempted +
0.44 * free_throws_attempted))
) %>%
arrange(desc(ppg))
cat("\nTeam Efficiency Rankings:\n")
print(team_efficiency)
# =============================================================================
# 6. Player Performance Analysis
# =============================================================================
# Top scorers
top_scorers <- player_box %>%
group_by(athlete_display_name, team_display_name) %>%
summarise(
games = n(),
total_points = sum(points, na.rm = TRUE),
ppg = mean(points, na.rm = TRUE),
rpg = mean(rebounds, na.rm = TRUE),
apg = mean(assists, na.rm = TRUE),
fg_pct = sum(field_goals_made) / sum(field_goals_attempted),
.groups = "drop"
) %>%
filter(games >= 10) %>%
arrange(desc(ppg)) %>%
head(20)
cat("\nTop Scorers (min 10 games):\n")
print(top_scorers)
# =============================================================================
# 7. Game Schedule and Results
# =============================================================================
# Load schedule
schedule <- wehoop::wnba_schedule(season = 2024)
cat("\nSchedule Information:\n")
print(schedule %>%
select(game_date, home_display_name, away_display_name,
home_score, away_score, status_type_completed) %>%
head(10))
# =============================================================================
# 8. Visualization: Team Shooting Efficiency
# =============================================================================
ggplot(team_efficiency,
aes(x = fg_pct, y = three_pt_pct,
size = ppg, label = team_display_name)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_text(size = 3, hjust = -0.1) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
title = "WNBA Team Shooting Efficiency 2024",
subtitle = "Field Goal % vs Three-Point %, sized by PPG",
x = "Field Goal %",
y = "Three-Point %",
size = "Points Per Game"
) +
theme_minimal()
cat("\n=== wehoop Package Capabilities ===\n")
cat("✓ Play-by-play data\n")
cat("✓ Team and player box scores\n")
cat("✓ Schedule and standings\n")
cat("✓ Historical data (multiple seasons)\n")
cat("✓ NCAA Women's Basketball data\n")
Data Quality Considerations
WNBA data availability varies by source. ESPN and wehoop provide the most reliable access to play-by-play and box score data. For advanced tracking metrics (shot locations, defensive matchups), data is more limited compared to the NBA.
Best Practices for WNBA Data Collection
- Use wehoop in R for most comprehensive and reliable access
- Combine multiple data sources to fill gaps in coverage
- Cache data locally to avoid repeated API calls
- Validate data completeness—some games may have missing play-by-play
- Consider Basketball Reference for historical statistics
Common Data Access Challenges
- Limited tracking data compared to NBA (no SecondSpectrum equivalent)
- Inconsistent API documentation for WNBA endpoints
- Historical data completeness varies by season
- Some advanced metrics require manual calculation
Discussion
Have questions or feedback? Join our community discussion on
Discord or
GitHub Discussions.
Table of Contents
Related Topics
Quick Actions