Spaces:
Sleeping
Sleeping
| import polars as pl | |
| import datetime | |
| from shiny import App, reactive, ui | |
| from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme | |
| import api_scraper | |
| from stuff_model import feature_engineering as fe, stuff_apply | |
| theme.tabulator_site() | |
| # ============================================================================= | |
| # CONSTANTS & CONFIGURATION | |
| # ============================================================================= | |
| # Custom CSS for loading spinner | |
| CUSTOM_CSS = """ | |
| <style> | |
| /* Loading Overlay */ | |
| .loading-overlay { | |
| position: fixed; | |
| top: 0; | |
| left: 0; | |
| width: 100%; | |
| height: 100%; | |
| background: rgba(255, 255, 255, 0.92); | |
| display: flex; | |
| flex-direction: column; | |
| justify-content: center; | |
| align-items: center; | |
| z-index: 9999; | |
| opacity: 0; | |
| visibility: hidden; | |
| transition: opacity 0.3s ease, visibility 0.3s ease; | |
| } | |
| .loading-overlay.active { | |
| opacity: 1; | |
| visibility: visible; | |
| } | |
| /* Spinner */ | |
| .spinner { | |
| width: 60px; | |
| height: 60px; | |
| border: 5px solid #e0e0e0; | |
| border-top: 5px solid #0d6efd; | |
| border-radius: 50%; | |
| animation: spin 1s linear infinite; | |
| margin-bottom: 20px; | |
| } | |
| @keyframes spin { | |
| 0% { transform: rotate(0deg); } | |
| 100% { transform: rotate(360deg); } | |
| } | |
| .loading-text { | |
| font-size: 18px; | |
| color: #333; | |
| font-weight: 500; | |
| } | |
| /* Hide content while loading initially */ | |
| .shiny-busy ~ .container-fluid .card { | |
| opacity: 0.6; | |
| pointer-events: none; | |
| } | |
| </style> | |
| """ | |
| # Custom JavaScript for busy state handling | |
| CUSTOM_JS = """ | |
| <script> | |
| $(document).ready(function() { | |
| // Show loading overlay when Shiny is busy | |
| $(document).on('shiny:busy', function(event) { | |
| $('.loading-overlay').addClass('active'); | |
| }); | |
| // Hide loading overlay when Shiny is idle | |
| $(document).on('shiny:idle', function(event) { | |
| $('.loading-overlay').removeClass('active'); | |
| }); | |
| // Show loading on refresh button click | |
| $(document).on('click', '#refresh', function() { | |
| $('.loading-overlay').addClass('active'); | |
| }); | |
| }); | |
| </script> | |
| """ | |
| # Initialize scrapers | |
| scraper = api_scraper.MLB_Scrape() | |
| df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet') | |
| pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'], df_year_old_group['pitcher_name'])) | |
| # Column definitions for difference calculations | |
| COLS_TO_SUBTRACT = [ | |
| ("start_speed", "start_speed_old"), | |
| ("max_start_speed", "max_start_speed_old"), | |
| ("ivb", "ivb_old"), | |
| ("hb", "hb_old"), | |
| ("release_pos_z", "release_pos_z_old"), | |
| ("release_pos_x", "release_pos_x_old"), | |
| ("extension", "extension_old"), | |
| ("tj_stuff_plus", "tj_stuff_plus_old") | |
| ] | |
| COLS_TO_SUBTRACT_PERCENT = [ | |
| ("pitch_percent", "pitch_percent_old"), | |
| ("rhh_percent", "rhh_percent_old"), | |
| ("lhh_percent", "lhh_percent_old"), | |
| ] | |
| PITCH_TYPES = ['CH', 'CU', 'FC', 'FF', 'FS', 'SI', 'SL', 'ST', 'All'] | |
| # Standard aggregation expressions (reusable across functions) | |
| PITCH_AGG_EXPRS = [ | |
| pl.col('game_date').max().alias('last_pitched'), | |
| pl.col('start_speed').count().alias('count'), | |
| pl.col('start_speed').mean().alias('start_speed'), | |
| pl.col('start_speed').max().alias('max_start_speed'), | |
| pl.col('ivb').mean().alias('ivb'), | |
| pl.col('hb').mean().alias('hb'), | |
| pl.col('release_pos_z').mean().alias('release_pos_z'), | |
| pl.col('release_pos_x').mean().alias('release_pos_x'), | |
| pl.col('extension').mean().alias('extension'), | |
| pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'), | |
| pl.col("batter_hand").eq("R").sum().alias("rhh_count"), | |
| pl.col("batter_hand").eq("L").sum().alias("lhh_count") | |
| ] | |
| # ============================================================================= | |
| # COLUMN DEFINITIONS | |
| # ============================================================================= | |
| def get_all_columns(): | |
| return [ | |
| {"title": "ID", "field": "pitcher_id", "width": 80, "headerFilter": "input", "frozen": True}, | |
| {"title": "Pitcher Name", "field": "pitcher_name", "width": 200, "headerFilter": "input", "frozen": True}, | |
| {"title": "Team", "field": "pitcher_team", "width": 70, "headerFilter": "input", "frozen": True}, | |
| {"title": "Last Pitched", "field": "last_pitched", "width": 110, "headerFilter": "input", "frozen": True}, | |
| {"title": "Pitch Type", "field": "pitch_type", "width": 90, "headerFilter": "input", "frozen": True}, | |
| {"title": "New?", "field": "new_pitch", "width": 60, "headerFilter": "input", "frozen": False}, | |
| {"title": "Pitches", "field": "count", "width": 75}, | |
| {"title": "Pitch%", "field": "pitch_percent_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "LHH%", "field": "lhh_percent_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "RHH%", "field": "rhh_percent_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "Velocity", "field": "start_speed_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "Max Velo", "field": "max_start_speed_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "iVB", "field": "ivb_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "HB", "field": "hb_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "RelH", "field": "release_pos_z_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "RelS", "field": "release_pos_x_formatted", "width": 85, "formatter": "textarea"}, | |
| {"title": "Extension", "field": "extension_formatted", "width": 90, "formatter": "textarea"}, | |
| {"title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 90, "formatter": "textarea"} | |
| ] | |
| def get_daily_columns(): | |
| """Get columns for daily table (replaces 'Last Pitched' with 'Date').""" | |
| cols = get_all_columns() | |
| cols[3] = {"title": "Date", "field": "game_date", "width": 100, "headerFilter": "input", "frozen": True} | |
| return cols | |
| def get_tjstuff_columns(): | |
| """Get columns for tjStuff+ table with 2024 comparison.""" | |
| return get_all_columns() + [ | |
| {"title": "2024 tjStuff+", "field": "tj_stuff_plus_old", "width": 100, "formatter": "textarea"}, | |
| {"title": "Δ", "field": "tj_stuff_plus_diff", "width": 70, "formatter": "textarea"} | |
| ] | |
| def get_summary_columns(): | |
| """Get columns for summary table.""" | |
| return [ | |
| {"title": "ID", "field": "pitcher_id", "width": 80, "headerFilter": "input", "frozen": True}, | |
| {"title": "Pitcher Name", "field": "pitcher_name", "width": 200, "headerFilter": "input", "frozen": True}, | |
| {"title": "Team", "field": "pitcher_team", "width": 70, "headerFilter": "input", "frozen": True}, | |
| {"title": "Pitches", "field": "count", "width": 80, "headerFilter": "input"}, | |
| ] + [{"title": col, "field": col, "width": 70, "formatter": "textarea"} for col in PITCH_TYPES] | |
| def get_team_columns(): | |
| """Get columns for team table.""" | |
| return [ | |
| {"title": "Team", "field": "pitcher_team", "width": 150, "headerFilter": "input", "frozen": True}, | |
| {"title": "Pitches", "field": "count", "width": 120, "headerFilter": "input"}, | |
| {"title": "tjStuff+", "field": "tj_stuff_plus", "width": 100, "formatter": "textarea"}, | |
| ] | |
| # ============================================================================= | |
| # DATA PROCESSING HELPERS | |
| # ============================================================================= | |
| def compute_pitcher_totals(df, group_cols=None): | |
| """Compute total pitches for each pitcher.""" | |
| group_cols = group_cols or ["pitcher_id"] | |
| return df.group_by(group_cols).agg(pl.len().alias("pitcher_total")) | |
| def compute_hand_totals(df, group_cols=None): | |
| """Compute totals by batter hand.""" | |
| group_cols = group_cols or ["pitcher_id"] | |
| return ( | |
| df.group_by(group_cols + ["batter_hand"]) | |
| .agg(pl.len().alias("pitcher_total")) | |
| .pivot(values="pitcher_total", index=group_cols, on="batter_hand", aggregate_function="sum") | |
| .rename({"L": "pitcher_total_left", "R": "pitcher_total_right"}) | |
| .fill_null(0) | |
| ) | |
| def aggregate_pitch_data(df, group_cols): | |
| """Aggregate pitch data by specified grouping columns.""" | |
| return df.group_by(group_cols).agg(PITCH_AGG_EXPRS) | |
| def add_pitch_percentages(df): | |
| """Add pitch percentage columns.""" | |
| return df.with_columns([ | |
| (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent"), | |
| (pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"), | |
| (pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent") | |
| ]) | |
| def mark_new_pitches(df, old_df, speed_col="start_speed_old"): | |
| """Mark new pitches that don't exist in old data.""" | |
| return df.with_columns( | |
| pl.when( | |
| pl.col(speed_col).is_null() & | |
| pl.col('pitcher_id').is_in(old_df['pitcher_id']) | |
| ).then(pl.lit(True)) | |
| .otherwise(pl.lit(None)) | |
| .alias("new_pitch") | |
| ) | |
| def format_diff_value(new_col: str, diff_col: str) -> pl.Expr: | |
| """Format a numeric column with its diff in parentheses using native Polars.""" | |
| return ( | |
| pl.when(pl.col(diff_col).eq(10000)) | |
| .then(pl.col(new_col).round(1).cast(pl.Utf8) + "\n\t") | |
| .otherwise( | |
| pl.col(new_col).round(1).cast(pl.Utf8) + "\n(" + | |
| pl.when(pl.col(diff_col) >= 0) | |
| .then(pl.lit("+") + pl.col(diff_col).round(1).cast(pl.Utf8)) | |
| .otherwise(pl.col(diff_col).round(1).cast(pl.Utf8)) + ")" | |
| ) | |
| ) | |
| def format_percent_diff_value(new_col: str, diff_col: str) -> pl.Expr: | |
| """Format a percent column with its diff in parentheses using native Polars.""" | |
| new_pct = (pl.col(new_col) * 100).round(1) | |
| diff_pct = (pl.col(diff_col) * 100).round(1) | |
| return ( | |
| pl.when(pl.col(diff_col).eq(10000)) | |
| .then( | |
| new_pct.cast(pl.Utf8) + "%\n(" + | |
| pl.when(new_pct >= 0).then(pl.lit("+")).otherwise(pl.lit("")) + | |
| new_pct.cast(pl.Utf8) + "%)" | |
| ) | |
| .otherwise( | |
| new_pct.cast(pl.Utf8) + "%\n(" + | |
| pl.when(diff_pct >= 0).then(pl.lit("+")).otherwise(pl.lit("")) + | |
| diff_pct.cast(pl.Utf8) + "%)" | |
| ) | |
| ) | |
| def format_diff_columns(df, cols_to_subtract): | |
| """Create diff columns and format them using native Polars expressions.""" | |
| # Calculate diffs | |
| df = df.with_columns([ | |
| pl.when(pl.col(old).is_null()) | |
| .then(pl.lit(10000.0)) | |
| .otherwise(pl.col(new) - pl.col(old)) | |
| .alias(new + "_diff") | |
| for new, old in cols_to_subtract | |
| ]) | |
| # Format with diffs | |
| return df.with_columns([ | |
| format_diff_value(new, new + "_diff").alias(new + "_formatted") | |
| for new, _ in cols_to_subtract | |
| ]) | |
| def format_percent_diff_columns(df, cols_to_subtract): | |
| """Create percent diff columns and format them using native Polars expressions.""" | |
| # Calculate diffs | |
| df = df.with_columns([ | |
| pl.when(pl.col(old).is_null()) | |
| .then(pl.lit(10000.0)) | |
| .otherwise(pl.col(new) - pl.col(old)) | |
| .alias(new + "_diff") | |
| for new, old in cols_to_subtract | |
| ]) | |
| # Format with diffs | |
| return df.with_columns([ | |
| format_percent_diff_value(new, new + "_diff").alias(new + "_formatted") | |
| for new, _ in cols_to_subtract | |
| ]) | |
| def add_team_column(df, spring_df): | |
| """Add team column to dataframe.""" | |
| team_dict = dict(zip(spring_df['pitcher_id'], spring_df['pitcher_team'])) | |
| df['pitcher_team'] = df['pitcher_id'].map(team_dict) | |
| return df | |
| def process_and_aggregate(df_stuff, group_cols, join_cols=None): | |
| """Standard pipeline: aggregate, join totals, add percentages.""" | |
| join_cols = join_cols or ["pitcher_id"] | |
| df_totals = compute_pitcher_totals(df_stuff, join_cols) | |
| df_hand_totals = compute_hand_totals(df_stuff, join_cols) | |
| df_group = aggregate_pitch_data(df_stuff, group_cols) | |
| df_group = df_group.join(df_totals, on=join_cols, how="left") | |
| df_group = df_group.join(df_hand_totals, on=join_cols, how="left") | |
| return add_pitch_percentages(df_group) | |
| def merge_and_format(df_group, old_df, cols_sub=None, cols_pct=None, suffix="_old"): | |
| """Merge with old data and apply formatting.""" | |
| cols_sub = cols_sub or COLS_TO_SUBTRACT | |
| cols_pct = cols_pct or COLS_TO_SUBTRACT_PERCENT | |
| df_merge = df_group.join(old_df, on=['pitcher_id', 'pitch_type'], how='left', suffix=suffix) | |
| df_merge = mark_new_pitches(df_merge, old_df, f"start_speed{suffix}") | |
| df_merge = format_diff_columns(df_merge, cols_sub) | |
| df_merge = format_percent_diff_columns(df_merge, cols_pct) | |
| return df_merge | |
| # ============================================================================= | |
| # UI DEFINITION | |
| # ============================================================================= | |
| app_ui = ui.page_fluid( | |
| # Inject custom CSS and JS | |
| ui.head_content(ui.HTML(CUSTOM_CSS), ui.HTML(CUSTOM_JS)), | |
| # Loading overlay | |
| ui.div( | |
| ui.div(class_="spinner"), | |
| ui.div("Loading data...", class_="loading-text"), | |
| class_="loading-overlay", | |
| id="loading-overlay" | |
| ), | |
| ui.card( | |
| ui.card_header("2025 MLB Pitch Data App"), | |
| ui.row( | |
| ui.column(4, | |
| ui.markdown("""This app generates a table which shows the 2025 MLB Pitch data. | |
| * Differences are calculated based on 2024 regular season data | |
| * If 2024 data does not exist for pitcher, 2023 Data is used | |
| * If no difference exists, the pitch is labelled as a new pitch"""), | |
| ui.input_action_button("refresh", "Refresh Data", class_="btn-primary", width="100%") | |
| ), | |
| ui.column(3, | |
| ui.div("By: ", ui.tags.a("@TJStats", href="https://x.com/TJStats", target="_blank")), | |
| ui.tags.p("Data: MLB"), | |
| ui.tags.p(ui.tags.a("Support me on Patreon for more baseball content", | |
| href="https://www.patreon.com/TJ_Stats", target="_blank")) | |
| ) | |
| ), | |
| ui.navset_tab( | |
| ui.nav_panel("All Pitches", | |
| ui.row( | |
| ui.column(1, ui.download_button("download_all", "Download Data", class_="btn-sm mb-3")), | |
| ui.column(2, ui.div({"class": "input-group"}, ui.span("Pitches >=", class_="input-label"), | |
| ui.input_numeric(id='pitches_all_min', label='', value=1, min=1, width="100px"))) | |
| ), | |
| output_tabulator("table_all") | |
| ), | |
| ui.nav_panel("Last Game to Season", | |
| ui.row( | |
| ui.column(2, ui.div({"class": "input-group"}, ui.span("Pitches >=", class_="input-label"), | |
| ui.input_numeric(id='pitches_all_compare_min', label='', value=1, min=1, width="100px"))) | |
| ), | |
| output_tabulator("table_all_compare") | |
| ), | |
| ui.nav_panel("Daily Pitches", | |
| ui.row( | |
| ui.column(1, ui.download_button("download_daily", "Download Data", class_="btn-sm mb-3")), | |
| ui.column(2, ui.div({"class": "input-group"}, ui.span("Pitches >=", class_="input-label"), | |
| ui.input_numeric(id='pitches_daily_min', label='', value=1, min=1, width="100px"))) | |
| ), | |
| output_tabulator("table_daily") | |
| ), | |
| ui.nav_panel("tjStuff+", | |
| ui.row( | |
| ui.column(2, ui.div({"class": "input-group"}, ui.span("Pitches >=", class_="input-label"), | |
| ui.input_numeric(id='pitches_tjstuff_min', label='', value=1, min=1, width="100px"))) | |
| ), | |
| output_tabulator("table_tjstuff") | |
| ), | |
| ui.nav_panel("tjStuff+ Summary", | |
| ui.row( | |
| ui.column(1, ui.download_button("download_tjsumm", "Download Data", class_="btn-sm mb-3")), | |
| ui.column(2, ui.div({"class": "input-group"}, ui.span("Pitches >=", class_="input-label"), | |
| ui.input_numeric(id='pitches_tjsumm_min', label='', value=1, min=1, width="100px"))) | |
| ), | |
| output_tabulator("table_stuff_all") | |
| ), | |
| ui.nav_panel("tjStuff+ Team", | |
| ui.row(ui.column(2)), | |
| output_tabulator("table_tjstuff_team") | |
| ) | |
| ) | |
| ) | |
| ) | |
| def server(input, output, session): | |
| # ========================================================================= | |
| # CORE DATA LOADING (Cached) | |
| # ========================================================================= | |
| def spring_data(): | |
| """Load raw pitch data from parquet and fetch today's games.""" | |
| df_spring = pl.read_parquet( | |
| "https://huggingface.co/datasets/TJStatsApps/mlb_data/resolve/main/data/mlb_pitch_data_2025.parquet" | |
| ) | |
| date = (datetime.datetime.now() - datetime.timedelta(hours=8)).date() | |
| game_list_input = ( | |
| scraper.get_schedule(year_input=[date.year], sport_id=[1], game_type=['R']) | |
| .filter(pl.col('date') == date)['game_id'] | |
| ) | |
| data = scraper.get_data(game_list_input) | |
| df = scraper.get_data_df(data) | |
| df_spring = pl.concat([df_spring, df]).unique(subset=['play_id']).sort('game_date', descending=True) | |
| return df_spring.filter(pl.col('start_speed') > 0) | |
| def stuff_data(): | |
| """Apply feature engineering and stuff model - cached to avoid recomputation.""" | |
| df_spring = spring_data().unique(subset=['play_id']) | |
| return stuff_apply.stuff_apply(fe.feature_engineering(df_spring)) | |
| # ========================================================================= | |
| # PRE-COMPUTED AGGREGATIONS (Cached) | |
| # ========================================================================= | |
| def ts_data(): | |
| """Season-level aggregation for download.""" | |
| df_stuff = stuff_data() | |
| df_group = process_and_aggregate(df_stuff, ['pitcher_id', 'pitcher_name', 'pitch_type']) | |
| df_merge = df_group.join(df_year_old_group, on=['pitcher_id', 'pitch_type'], how='left', suffix='_old') | |
| df_merge = mark_new_pitches(df_merge, df_year_old_group) | |
| return df_merge.select([ | |
| 'pitcher_id', 'pitcher_name', 'pitch_type', 'count', 'pitch_percent', | |
| 'rhh_percent', 'lhh_percent', 'start_speed', 'max_start_speed', 'ivb', | |
| 'hb', 'release_pos_z', 'release_pos_x', 'extension', 'tj_stuff_plus' | |
| ]) | |
| def ts_data_daily(): | |
| """Daily-level aggregation for download.""" | |
| df_stuff = stuff_data() | |
| join_cols = ["pitcher_id", 'game_id'] | |
| group_cols = ['pitcher_id', 'pitcher_name', 'pitch_type', 'game_id', 'game_date'] | |
| df_group = process_and_aggregate(df_stuff, group_cols, join_cols) | |
| df_merge = df_group.join(df_year_old_group, on=['pitcher_id', 'pitch_type'], how='left', suffix='_old') | |
| df_merge = mark_new_pitches(df_merge, df_year_old_group) | |
| return df_merge.select([ | |
| 'pitcher_id', 'pitcher_name', 'game_date', 'pitch_type', 'count', 'pitch_percent', | |
| 'rhh_percent', 'lhh_percent', 'start_speed', 'max_start_speed', 'ivb', | |
| 'hb', 'release_pos_z', 'release_pos_x', 'extension', 'tj_stuff_plus' | |
| ]) | |
| def ts_data_summ(): | |
| """tjStuff+ summary pivot table.""" | |
| df_stuff = stuff_data() | |
| df_agg = df_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg( | |
| pl.len().alias('count'), | |
| pl.col('tj_stuff_plus').mean() | |
| ) | |
| # Calculate weighted average for "All" pitch type | |
| df_weighted_avg = ( | |
| df_agg.with_columns((pl.col('tj_stuff_plus') * pl.col('count')).alias('weighted')) | |
| .group_by(['pitcher_id', 'pitcher_name']) | |
| .agg( | |
| pl.col('count').sum().alias('total_count'), | |
| pl.col('weighted').sum().alias('total_weighted') | |
| ) | |
| .with_columns( | |
| (pl.col('total_weighted') / pl.col('total_count')).alias('tj_stuff_plus'), | |
| pl.lit("All").alias('pitch_type') | |
| ) | |
| .select(['pitcher_id', 'pitcher_name', 'pitch_type', | |
| pl.col('total_count').alias('count'), 'tj_stuff_plus']) | |
| ) | |
| df_small = pl.concat([df_agg, df_weighted_avg]) | |
| count_dict = dict(zip( | |
| df_small.filter(pl.col('pitch_type') == 'All')['pitcher_id'], | |
| df_small.filter(pl.col('pitch_type') == 'All')['count'] | |
| )) | |
| df_pivot = ( | |
| df_small.pivot(index=['pitcher_id', 'pitcher_name'], on='pitch_type', values='tj_stuff_plus') | |
| .with_columns(pl.col("pitcher_id").replace_strict(count_dict, default=None).alias("count")) | |
| ) | |
| # Ensure all pitch type columns exist | |
| missing_cols = [col for col in PITCH_TYPES if col not in df_pivot.columns] | |
| if missing_cols: | |
| df_pivot = df_pivot.with_columns([pl.lit(None).alias(col) for col in missing_cols]) | |
| return df_pivot | |
| # ========================================================================= | |
| # DOWNLOADS | |
| # ========================================================================= | |
| def download_all(): | |
| yield ts_data().write_csv() | |
| def download_daily(): | |
| yield ts_data_daily().write_csv() | |
| def download_tjsumm(): | |
| yield ts_data_summ().write_csv() | |
| # ========================================================================= | |
| # TABLE RENDERERS | |
| # ========================================================================= | |
| def table_all(): | |
| df_stuff = stuff_data() | |
| df_group = process_and_aggregate(df_stuff, ['pitcher_id', 'pitcher_name', 'pitch_type']) | |
| df_merge = merge_and_format(df_group, df_year_old_group) | |
| df_merge = df_merge.filter(pl.col('count') >= int(input.pitches_all_min())) | |
| df_plot = add_team_column(df_merge.to_pandas(), spring_data()) | |
| return Tabulator(df_plot, table_options=TableOptions(height=750, columns=get_all_columns())) | |
| def table_all_compare(): | |
| """Compare last game to season data.""" | |
| df_stuff = stuff_data() | |
| # Split into last game vs prior games | |
| last_game_dates = df_stuff.group_by("pitcher_id").agg( | |
| pl.col("game_date").max().alias("last_game_date") | |
| ) | |
| df_stuff = df_stuff.join(last_game_dates, on="pitcher_id") | |
| df_last = df_stuff.filter(pl.col("game_date") == pl.col("last_game_date")) | |
| df_prior = df_stuff.filter(pl.col("game_date") != pl.col("last_game_date")) | |
| # Aggregate both datasets using shared function | |
| df_last_group = process_and_aggregate(df_last, ['pitcher_id', 'pitcher_name', 'pitch_type']) | |
| df_prior_group = process_and_aggregate(df_prior, ['pitcher_id', 'pitcher_name', 'pitch_type']) | |
| # Merge and format with prior suffix | |
| df_merge = df_last_group.join(df_prior_group, on=["pitcher_id", "pitch_type"], how="left", suffix="_prior") | |
| cols_prior = [(n, n.replace("_old", "_prior")) for n, _ in COLS_TO_SUBTRACT] | |
| cols_percent_prior = [(n, n.replace("_old", "_prior")) for n, _ in COLS_TO_SUBTRACT_PERCENT] | |
| df_merge = mark_new_pitches(df_merge, df_prior_group, "start_speed_prior") | |
| df_merge = format_diff_columns(df_merge, cols_prior) | |
| df_merge = format_percent_diff_columns(df_merge, cols_percent_prior) | |
| df_merge = df_merge.filter(pl.col('count') >= int(input.pitches_all_compare_min())) | |
| df_plot = add_team_column(df_merge.to_pandas(), spring_data()) | |
| return Tabulator(df_plot, table_options=TableOptions(height=750, columns=get_all_columns())) | |
| def table_daily(): | |
| """Daily breakdown by game.""" | |
| df_stuff = stuff_data() | |
| join_cols = ["pitcher_id", 'game_id'] | |
| group_cols = ['pitcher_id', 'pitcher_name', 'pitch_type', 'game_id', 'game_date'] | |
| df_group = process_and_aggregate(df_stuff, group_cols, join_cols) | |
| df_merge = merge_and_format(df_group, df_year_old_group) | |
| df_merge = df_merge.filter(pl.col('count') >= int(input.pitches_daily_min())) | |
| df_plot = add_team_column(df_merge.to_pandas(), spring_data()) | |
| return Tabulator(df_plot, table_options=TableOptions(height=750, columns=get_daily_columns())) | |
| def table_tjstuff(): | |
| """tjStuff+ table with 2024 comparison.""" | |
| df_stuff = stuff_data() | |
| df_group = process_and_aggregate(df_stuff, ['pitcher_id', 'pitcher_name', 'pitch_type']) | |
| df_merge = df_group.join(df_year_old_group, on=['pitcher_id', 'pitch_type'], how='left', suffix='_old') | |
| df_merge = mark_new_pitches(df_merge, df_year_old_group) | |
| # Simple formatting without diff brackets (uses native Polars) | |
| df_merge = df_merge.with_columns([ | |
| pl.when(pl.col(old).is_null()) | |
| .then(pl.lit(None)) | |
| .otherwise(pl.col(new) - pl.col(old)) | |
| .alias(new + "_diff") | |
| for new, old in COLS_TO_SUBTRACT | |
| ]) | |
| df_merge = df_merge.with_columns([ | |
| pl.col(new).round(1).cast(pl.Utf8).alias(new + "_formatted") | |
| for new, _ in COLS_TO_SUBTRACT | |
| ]) | |
| # Format tjStuff+ old and diff using native Polars (no map_elements) | |
| df_merge = df_merge.with_columns([ | |
| pl.col("tj_stuff_plus_old").round(1).cast(pl.Utf8).alias("tj_stuff_plus_old"), | |
| pl.when(pl.col("tj_stuff_plus_diff") >= 0) | |
| .then(pl.lit("+") + pl.col("tj_stuff_plus_diff").round(1).cast(pl.Utf8)) | |
| .otherwise(pl.col("tj_stuff_plus_diff").round(1).cast(pl.Utf8)) | |
| .alias("tj_stuff_plus_diff") | |
| ]) | |
| # Format percent columns using native Polars | |
| df_merge = df_merge.with_columns([ | |
| ((pl.col(col) * 100).round(1).cast(pl.Utf8) + "%").alias(col + "_formatted") | |
| for col in ['pitch_percent', 'rhh_percent', 'lhh_percent'] | |
| ]).sort(['pitcher_id', 'count'], descending=True) | |
| df_merge = df_merge.filter(pl.col('count') >= int(input.pitches_tjstuff_min())) | |
| df_plot = add_team_column(df_merge.to_pandas(), spring_data()) | |
| return Tabulator(df_plot, table_options=TableOptions(height=750, columns=get_tjstuff_columns())) | |
| def table_stuff_all(): | |
| """tjStuff+ summary pivot table.""" | |
| df_pivot = ts_data_summ() | |
| df_pivot = df_pivot.with_columns([ | |
| pl.col(col).round(0).alias(col) for col in PITCH_TYPES | |
| ]) | |
| df_pivot = df_pivot.filter(pl.col('count') >= int(input.pitches_tjsumm_min())) | |
| df_plot = add_team_column(df_pivot.sort(['pitcher_id', 'count'], descending=True).to_pandas(), spring_data()) | |
| return Tabulator(df_plot, table_options=TableOptions(height=750, columns=get_summary_columns())) | |
| def table_tjstuff_team(): | |
| """tjStuff+ by team.""" | |
| df_stuff = stuff_data() | |
| df_team = df_stuff.group_by(['pitcher_team']).agg([ | |
| pl.len().alias('count'), | |
| pl.col('tj_stuff_plus').mean().round(0).alias('tj_stuff_plus'), | |
| ]).sort(['tj_stuff_plus'], descending=True) | |
| return Tabulator(df_team.to_pandas(), table_options=TableOptions(height=750, columns=get_team_columns())) | |
| app = App(app_ui, server) | |