Play-Calling Tendencies Analysis

Beginner 10 min read 0 views Nov 27, 2025
# Play-Calling Tendencies Analysis ## Introduction Play-calling tendency analysis identifies patterns in offensive and defensive decision-making. Understanding these tendencies allows teams to exploit predictability and adjust their game plans for competitive advantage. ## Core Concepts ### Tendency Categories - **Down and Distance**: Play type by situation - **Formation**: Personnel grouping preferences - **Field Position**: Red zone vs midfield tendencies - **Game Script**: Leading vs trailing play selection - **Time and Score**: Clock management decisions ### Key Metrics - **Run-Pass Ratio**: Balance by situation - **Formation Usage**: Personnel grouping frequency - **Play Direction**: Left, middle, right distribution - **Tempo**: Plays per minute, no-huddle frequency - **Predictability Index**: Entropy-based measure of tendency strength ## R Analysis with nflfastR ```r library(nflfastR) library(dplyr) library(ggplot2) library(tidyr) # Load play-by-play data pbp <- load_pbp(2023) # Filter to offensive plays plays <- pbp %>% filter( !is.na(posteam), play_type %in% c("run", "pass"), !is.na(down) ) # Calculate run-pass tendencies by down and distance run_pass_tendencies <- plays %>% mutate( distance_category = case_when( down == 1 ~ "1st Down", down == 2 & ydstogo <= 5 ~ "2nd & Short", down == 2 & ydstogo > 5 ~ "2nd & Long", down == 3 & ydstogo <= 3 ~ "3rd & Short", down == 3 & ydstogo <= 7 ~ "3rd & Med", down == 3 & ydstogo > 7 ~ "3rd & Long", TRUE ~ "Other" ) ) %>% filter(distance_category != "Other") %>% group_by(posteam, distance_category, play_type) %>% summarise(plays = n(), .groups = "drop") %>% group_by(posteam, distance_category) %>% mutate( total_plays = sum(plays), play_rate = plays / total_plays * 100 ) %>% filter(play_type == "pass") %>% select(posteam, distance_category, pass_rate = play_rate, total_plays) # Visualize pass rate by situation league_avg <- run_pass_tendencies %>% group_by(distance_category) %>% summarise( avg_pass_rate = weighted.mean(pass_rate, total_plays), .groups = "drop" ) print("League Average Pass Rate by Situation:") print(league_avg) # Plot pass tendencies by situation ggplot(league_avg, aes(x = reorder(distance_category, avg_pass_rate), y = avg_pass_rate)) + geom_col(fill = "steelblue", alpha = 0.7) + geom_text(aes(label = paste0(round(avg_pass_rate, 1), "%")), hjust = -0.2, size = 4) + coord_flip() + labs( title = "NFL Pass Rate by Down and Distance - 2023", x = "Situation", y = "Pass Rate (%)", subtitle = "League averages across all teams" ) + theme_minimal() # Team-specific tendency analysis # Identify most and least predictable teams team_tendencies <- run_pass_tendencies %>% pivot_wider( names_from = distance_category, values_from = c(pass_rate, total_plays) ) # Calculate predictability score (variance in pass rate) # Higher variance = more situationally adaptive team_predictability <- run_pass_tendencies %>% group_by(posteam) %>% summarise( avg_pass_rate = weighted.mean(pass_rate, total_plays), pass_rate_variance = var(pass_rate), total_plays = sum(total_plays), .groups = "drop" ) %>% mutate( predictability = 100 - pass_rate_variance, # Lower variance = more predictable tendency_strength = abs(50 - avg_pass_rate) # Distance from 50-50 balance ) %>% arrange(predictability) print("\nMost Predictable Offenses (Low Variance):") print(team_predictability %>% tail(10)) print("\nLeast Predictable Offenses (High Variance - Situationally Adaptive):") print(team_predictability %>% head(10)) # Formation tendencies formation_usage <- plays %>% mutate( personnel_group = case_when( grepl("1 RB.*3 WR", personnel) ~ "11 Personnel", grepl("1 RB.*2 WR.*2 TE", personnel) ~ "12 Personnel", grepl("1 RB.*1 WR", personnel) ~ "13 Personnel", grepl("1 RB.*4 WR", personnel) ~ "10 Personnel", grepl("2 RB", personnel) ~ "21 Personnel", TRUE ~ "Other" ) ) %>% filter(personnel_group != "Other") %>% group_by(posteam, personnel_group, play_type) %>% summarise(plays = n(), .groups = "drop") %>% group_by(posteam, personnel_group) %>% mutate( total_plays = sum(plays), usage_rate = plays / sum(plays) * 100 ) # Top 11 personnel users personnel_11_usage <- formation_usage %>% filter(personnel_group == "11 Personnel") %>% group_by(posteam) %>% summarise( total_11_plays = sum(plays), .groups = "drop" ) %>% left_join( plays %>% group_by(posteam) %>% summarise(total_plays = n()), by = "posteam" ) %>% mutate(personnel_11_rate = total_11_plays / total_plays * 100) %>% arrange(desc(personnel_11_rate)) print("\nTop 10 Teams - 11 Personnel Usage Rate:") print(personnel_11_usage %>% head(10)) # Pass rate from different formations formation_pass_rate <- formation_usage %>% filter(play_type == "pass", total_plays >= 50) %>% select(posteam, personnel_group, pass_rate = usage_rate) # League average pass rate by formation formation_pass_avg <- plays %>% mutate( personnel_group = case_when( grepl("1 RB.*3 WR", personnel) ~ "11 Personnel", grepl("1 RB.*2 WR.*2 TE", personnel) ~ "12 Personnel", grepl("1 RB.*1 WR", personnel) ~ "13 Personnel", grepl("1 RB.*4 WR", personnel) ~ "10 Personnel", grepl("2 RB", personnel) ~ "21 Personnel", TRUE ~ "Other" ) ) %>% filter(personnel_group != "Other") %>% group_by(personnel_group, play_type) %>% summarise(plays = n(), .groups = "drop") %>% group_by(personnel_group) %>% mutate(pass_rate = plays / sum(plays) * 100) %>% filter(play_type == "pass") print("\nLeague Pass Rate by Personnel Grouping:") print(formation_pass_avg) # Score differential tendencies score_tendencies <- plays %>% mutate( score_diff_category = case_when( score_differential >= 17 ~ "Leading Big (+17)", score_differential >= 9 ~ "Leading Moderate (+9-16)", score_differential >= 1 ~ "Leading Small (+1-8)", score_differential == 0 ~ "Tied", score_differential >= -8 ~ "Trailing Small (-1 to -8)", score_differential >= -16 ~ "Trailing Moderate (-9 to -16)", TRUE ~ "Trailing Big (-17+)" ) ) %>% group_by(score_diff_category, play_type) %>% summarise(plays = n(), .groups = "drop") %>% group_by(score_diff_category) %>% mutate(pass_rate = plays / sum(plays) * 100) %>% filter(play_type == "pass") %>% arrange(desc(pass_rate)) print("\nPass Rate by Score Differential:") print(score_tendencies) # Visualize game script impact ggplot(score_tendencies, aes(x = reorder(score_diff_category, pass_rate), y = pass_rate)) + geom_col(aes(fill = pass_rate > 50), show.legend = FALSE) + scale_fill_manual(values = c("darkgreen", "darkblue")) + geom_text(aes(label = paste0(round(pass_rate, 1), "%")), hjust = -0.2, size = 4) + coord_flip() + labs( title = "Pass Rate by Game Script - 2023 NFL", x = "Score Differential", y = "Pass Rate (%)", subtitle = "Trailing teams pass significantly more" ) + theme_minimal() # Time-based tendencies (late game situations) late_game <- plays %>% filter(qtr == 4, game_seconds_remaining <= 300) %>% # Final 5 minutes mutate( score_diff_category = case_when( score_differential >= 9 ~ "Leading 9+", score_differential >= 4 ~ "Leading 4-8", abs(score_differential) <= 3 ~ "Close Game (±3)", score_differential <= -4 & score_differential >= -8 ~ "Trailing 4-8", score_differential <= -9 ~ "Trailing 9+", TRUE ~ "Other" ) ) %>% filter(score_diff_category != "Other") %>% group_by(score_diff_category, play_type) %>% summarise(plays = n(), .groups = "drop") %>% group_by(score_diff_category) %>% mutate( total = sum(plays), rate = plays / total * 100 ) %>% filter(play_type == "pass") print("\nLate Game (Final 5 min) Pass Rate by Score:") print(late_game) # Tendency exploitation - opponent adjustment # Calculate how much teams deviate from their own tendencies vs specific opponents team_base_tendencies <- run_pass_tendencies %>% group_by(posteam) %>% summarise( base_pass_rate = weighted.mean(pass_rate, total_plays), .groups = "drop" ) # Compare first down tendencies first_down_only <- plays %>% filter(down == 1) %>% group_by(posteam, play_type) %>% summarise(plays = n(), .groups = "drop") %>% group_by(posteam) %>% mutate(pass_rate = plays / sum(plays) * 100) %>% filter(play_type == "pass") %>% arrange(desc(pass_rate)) print("\nFirst Down Pass Rate (Most Pass-Happy on 1st Down):") print(first_down_only %>% head(10)) print("\nFirst Down Pass Rate (Most Run-Heavy on 1st Down):") print(first_down_only %>% tail(10)) ``` ## Python Implementation ```python import nfl_data_py as nfl import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import entropy # Load play-by-play data pbp = nfl.import_pbp_data([2023]) # Filter plays plays = pbp[ (pbp['play_type'].isin(['run', 'pass'])) & (pbp['down'].notna()) & (pbp['posteam'].notna()) ].copy() # Categorize situations def categorize_situation(row): if row['down'] == 1: return '1st Down' elif row['down'] == 2 and row['ydstogo'] <= 5: return '2nd & Short' elif row['down'] == 2 and row['ydstogo'] > 5: return '2nd & Long' elif row['down'] == 3 and row['ydstogo'] <= 3: return '3rd & Short' elif row['down'] == 3 and row['ydstogo'] <= 7: return '3rd & Med' elif row['down'] == 3 and row['ydstogo'] > 7: return '3rd & Long' else: return 'Other' plays['situation'] = plays.apply(categorize_situation, axis=1) plays = plays[plays['situation'] != 'Other'] # Calculate pass rate by situation tendencies = plays.groupby(['posteam', 'situation', 'play_type']).size().reset_index(name='plays') tendencies['total_plays'] = tendencies.groupby(['posteam', 'situation'])['plays'].transform('sum') tendencies['play_rate'] = tendencies['plays'] / tendencies['total_plays'] * 100 # Pass rate only pass_tendencies = tendencies[tendencies['play_type'] == 'pass'][ ['posteam', 'situation', 'play_rate', 'total_plays'] ].rename(columns={'play_rate': 'pass_rate'}) # League averages league_avg = pass_tendencies.groupby('situation').apply( lambda x: np.average(x['pass_rate'], weights=x['total_plays']) ).reset_index(name='avg_pass_rate').sort_values('avg_pass_rate') print("League Average Pass Rate by Situation:") print(league_avg) # Calculate predictability score # Use entropy as measure of unpredictability team_predictability = [] for team in plays['posteam'].unique(): team_plays = plays[plays['posteam'] == team] # Calculate pass rate across all situations situation_counts = team_plays.groupby(['situation', 'play_type']).size().unstack(fill_value=0) if 'pass' in situation_counts.columns and 'run' in situation_counts.columns: # Calculate entropy for each situation entropies = [] for situation in situation_counts.index: pass_count = situation_counts.loc[situation, 'pass'] run_count = situation_counts.loc[situation, 'run'] total = pass_count + run_count if total > 0: pass_prob = pass_count / total run_prob = run_count / total # Entropy: higher = more unpredictable if pass_prob > 0 and run_prob > 0: sit_entropy = -(pass_prob * np.log2(pass_prob) + run_prob * np.log2(run_prob)) else: sit_entropy = 0 entropies.append(sit_entropy) avg_entropy = np.mean(entropies) overall_pass_rate = (situation_counts['pass'].sum() / (situation_counts['pass'].sum() + situation_counts['run'].sum()) * 100) team_predictability.append({ 'team': team, 'entropy': avg_entropy, 'predictability_score': (1 - avg_entropy) * 100, # Convert to predictability 'pass_rate': overall_pass_rate }) predictability_df = pd.DataFrame(team_predictability).sort_values('entropy', ascending=False) print("\nMost Unpredictable Offenses (High Entropy):") print(predictability_df.head(10)) print("\nMost Predictable Offenses (Low Entropy):") print(predictability_df.tail(10)) # Score differential impact plays['score_category'] = pd.cut( plays['score_differential'], bins=[-np.inf, -16, -8, -1, 0, 8, 16, np.inf], labels=['Trailing Big', 'Trailing Mod', 'Trailing Small', 'Tied', 'Leading Small', 'Leading Mod', 'Leading Big'] ) score_tendencies = plays.groupby(['score_category', 'play_type']).size().reset_index(name='plays') score_tendencies['total_plays'] = score_tendencies.groupby('score_category')['plays'].transform('sum') score_tendencies['rate'] = score_tendencies['plays'] / score_tendencies['total_plays'] * 100 pass_by_score = score_tendencies[score_tendencies['play_type'] == 'pass'].sort_values('rate', ascending=False) print("\nPass Rate by Score Differential:") print(pass_by_score[['score_category', 'rate']]) # Formation tendencies # Simplified personnel analysis plays['is_11_personnel'] = plays['personnel'].str.contains('1 RB.*3 WR', na=False, regex=True) personnel_11_usage = plays.groupby('posteam').agg({ 'is_11_personnel': ['sum', 'count'] }).reset_index() personnel_11_usage.columns = ['team', 'personnel_11_plays', 'total_plays'] personnel_11_usage['personnel_11_rate'] = ( personnel_11_usage['personnel_11_plays'] / personnel_11_usage['total_plays'] * 100 ) personnel_11_usage = personnel_11_usage.sort_values('personnel_11_rate', ascending=False) print("\nTop 10 Teams - 11 Personnel Usage Rate:") print(personnel_11_usage.head(10)) # Visualizations fig, axes = plt.subplots(2, 2, figsize=(16, 12)) # 1. Pass rate by situation axes[0, 0].barh(range(len(league_avg)), league_avg['avg_pass_rate'], color='steelblue', alpha=0.7) axes[0, 0].set_yticks(range(len(league_avg))) axes[0, 0].set_yticklabels(league_avg['situation']) axes[0, 0].set_xlabel('Pass Rate (%)') axes[0, 0].set_title('NFL Pass Rate by Situation - 2023') axes[0, 0].axvline(50, color='red', linestyle='--', alpha=0.5) # 2. Team predictability top_predictable = predictability_df.head(15) axes[0, 1].scatter(top_predictable['pass_rate'], top_predictable['entropy'], s=100, alpha=0.6, c=top_predictable['entropy'], cmap='viridis') axes[0, 1].set_xlabel('Overall Pass Rate (%)') axes[0, 1].set_ylabel('Entropy (Unpredictability)') axes[0, 1].set_title('Team Play-Calling Predictability') axes[0, 1].grid(alpha=0.3) # 3. Pass rate by score differential axes[1, 0].bar(range(len(pass_by_score)), pass_by_score['rate'], color=['red', 'orange', 'yellow', 'gray', 'lightblue', 'blue', 'darkgreen']) axes[1, 0].set_xticks(range(len(pass_by_score))) axes[1, 0].set_xticklabels(pass_by_score['score_category'], rotation=45, ha='right') axes[1, 0].set_ylabel('Pass Rate (%)') axes[1, 0].set_title('Pass Rate by Game Script') axes[1, 0].axhline(50, color='black', linestyle='--', alpha=0.5) # 4. 11 Personnel usage top_11 = personnel_11_usage.head(15) axes[1, 1].barh(range(len(top_11)), top_11['personnel_11_rate'], color='darkgreen', alpha=0.7) axes[1, 1].set_yticks(range(len(top_11))) axes[1, 1].set_yticklabels(top_11['team'], fontsize=9) axes[1, 1].set_xlabel('11 Personnel Usage Rate (%)') axes[1, 1].set_title('Top 15 Teams - 11 Personnel Usage') axes[1, 1].invert_yaxis() plt.tight_layout() plt.show() ``` ## Key Insights ### Tendency Patterns - **Third and Long**: 85%+ pass rate across league - **Leading Late**: Run rate increases 30-40% when protecting lead - **11 Personnel**: 60%+ usage for pass-heavy offenses - **Score Impact**: Trailing teams pass 10-15% more than leading teams ### Exploiting Tendencies - Predictable teams gain 0.05 fewer EPA per play - Situation-neutral play-calling harder to defend - Formation diversity increases offensive efficiency - Balanced attacks on first down set up better second downs ## Resources - [nflfastR documentation](https://www.nflfastr.com/) - [Sharp Football Analysis](https://www.sharpfootballanalysis.com/)

Discussion

Have questions or feedback? Join our community discussion on Discord or GitHub Discussions.