2025_mlb_pitching_app

Paused

App Files Files Community

nesticot commited on May 19, 2025

Commit

9e9f4da

verified ·

1 Parent(s): 5ed9e39

Update app.py

Browse files

Files changed (1) hide show

app.py +226 -0

app.py CHANGED Viewed

@@ -72,6 +72,19 @@ app_ui = ui.page_fluid(
                 output_tabulator("table_all")
             ),
             ui.nav("Daily Pitches",
                  ui.row(
                     ui.column(2,
@@ -606,6 +619,219 @@ def server(input, output, session):
         )
     @output
     @render_tabulator
     @reactive.event(input.refresh)

                 output_tabulator("table_all")
             ),
+            ui.nav("Compre Pitches",
+                    ui.column(2,
+                        ui.div(
+                            {"class": "input-group"},
+                            ui.span("Pitches >=", class_="input-label"),
+                            ui.input_numeric(id='pitches_all_compare_min', label='', value=1, min=1, width="100px")
+                        )
+                    )),
+                output_tabulator("table_all_compare")
+            ),
             ui.nav("Daily Pitches",
                  ui.row(
                     ui.column(2,
         )
+    @output
+    @render_tabulator
+    @reactive.event(input.refresh)
+    def table_all_compare():
+        # Step 1: Load and deduplicate
+        df_spring = spring_data().unique(subset=['play_id'])
+        # Step 2: Feature engineer
+        df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
+        # Step 3: Identify each pitcher's last game
+        last_game_dates = (
+            df_spring_stuff
+            .group_by("pitcher_id")
+            .agg(pl.col("game_date").max().alias("last_game_date"))
+        )
+        df_spring_stuff = df_spring_stuff.join(last_game_dates, on="pitcher_id")
+        df_spring_stuff = df_spring_stuff.with_columns(
+            (pl.col("game_date") == pl.col("last_game_date")).alias("is_last_game")
+        )
+        # Step 4: Split last game vs all previous
+        df_last_game = df_spring_stuff.filter(pl.col("is_last_game"))
+        df_prior_games = df_spring_stuff.filter(~pl.col("is_last_game"))
+        # Step 5: Apply feature engineering to both
+        df_last_group = stuff_apply.stuff_apply(fe.feature_engineering(df_last_game))
+        df_prior_group = stuff_apply.stuff_apply(fe.feature_engineering(df_prior_games))
+        # Step 6: Group and aggregate both
+        def group_by_pitch(df):
+            df_pitcher_totals = df.group_by("pitcher_id").agg(
+                pl.col("start_speed").count().alias("pitcher_total")
+            )
+            df_pitcher_totals_hands = (
+                df
+                .group_by(["pitcher_id", "batter_hand"])
+                .agg(pl.col("start_speed").count().alias("pitcher_total"))
+                .pivot(
+                    values="pitcher_total",
+                    index="pitcher_id",
+                    columns="batter_hand",
+                    aggregate_function="sum"
+                )
+                .rename({"L": "pitcher_total_left", "R": "pitcher_total_right"})
+                .fill_null(0)
+            )
+            df_group = df.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
+                pl.col('game_date').max().alias('last_pitched'),
+                pl.col('start_speed').count().alias('count'),
+                pl.col('start_speed').mean().alias('start_speed'),
+                pl.col('start_speed').max().alias('max_start_speed'),
+                pl.col('ivb').mean().alias('ivb'),
+                pl.col('hb').mean().alias('hb'),
+                pl.col('release_pos_z').mean().alias('release_pos_z'),
+                pl.col('release_pos_x').mean().alias('release_pos_x'),
+                pl.col('extension').mean().alias('extension'),
+                pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
+                (pl.col("batter_hand") == "R").sum().alias("rhh_count"),
+                (pl.col("batter_hand") == "L").sum().alias("lhh_count")
+            ])
+            df_group = df_group.join(df_pitcher_totals, on="pitcher_id", how="left")
+            df_group = df_group.join(df_pitcher_totals_hands, on="pitcher_id", how="left")
+            df_group = df_group.with_columns([
+                (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent"),
+                (pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"),
+                (pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent")
+            ])
+            return df_group
+        df_last_group = group_by_pitch(df_last_group)
+        df_prior_group = group_by_pitch(df_prior_group)
+        # Step 7: Merge on pitcher_id and pitch_type
+        df_merge = df_last_group.join(df_prior_group, on=["pitcher_id", "pitch_type"], how="left", suffix="_prior")
+        # Step 8: Identify new pitch types
+        df_merge = df_merge.with_columns(
+            pl.col('pitcher_id').is_in(df_prior_group['pitcher_id']).alias('exists_in_prior')
+        )
+        df_merge = df_merge.with_columns(
+            pl.when(pl.col('start_speed_prior').is_null() & pl.col('exists_in_prior'))
+            .then(pl.lit(True))
+            .otherwise(pl.lit(None))
+            .alias("new_pitch")
+        )
+        # Step 9: Diff columns and formatted output
+        cols_to_subtract = [
+            ("start_speed", "start_speed_prior"),
+            ("max_start_speed", "max_start_speed_prior"),
+            ("ivb", "ivb_prior"),
+            ("hb", "hb_prior"),
+            ("release_pos_z", "release_pos_z_prior"),
+            ("release_pos_x", "release_pos_x_prior"),
+            ("extension", "extension_prior"),
+            ("tj_stuff_plus", "tj_stuff_plus_prior")
+        ]
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(10000))
+            .otherwise(pl.col(new) - pl.col(old))
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract
+        ])
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(new + "_diff") == 10000)
+            .then(pl.col(new).round(1).cast(pl.Utf8) + '\n\t')
+            .otherwise(
+                pl.col(new).round(1).cast(pl.Utf8) +
+                "\n(" +
+                pl.col(new + "_diff").round(1).map_elements(lambda x: f"{x:+.1f}") +
+                ")"
+            ).alias(new + "_formatted")
+            for new, _ in cols_to_subtract
+        ])
+        cols_to_subtract_percent = [
+            ("pitch_percent", "pitch_percent_prior"),
+            ("rhh_percent", "rhh_percent_prior"),
+            ("lhh_percent", "lhh_percent_prior")
+        ]
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(old).is_null())
+            .then(pl.lit(10000))
+            .otherwise(pl.col(new) - pl.col(old))
+            .alias(new + "_diff")
+            for new, old in cols_to_subtract_percent
+        ])
+        df_merge = df_merge.with_columns([
+            pl.when(pl.col(new + "_diff") == 10000)
+            .then(
+                (pl.col(new)*100).round(1).map_elements(lambda x: f"{x:.1f}%").cast(pl.Utf8) +
+                "\n(" +
+                (pl.col(new)*100).round(1).map_elements(lambda x: f"{x:+.1f}%") +
+                ")"
+            )
+            .otherwise(
+                (pl.col(new)*100).round(1).map_elements(lambda x: f"{x:.1f}%").cast(pl.Utf8) +
+                "\n(" +
+                (pl.col(new + "_diff")*100).round(1).map_elements(lambda x: f"{x:+.1f}%") +
+                ")"
+            ).alias(new + "_formatted")
+            for new, _ in cols_to_subtract_percent
+        ])
+        # df_merge = df_merge.with_columns([
+        #     (pl.col(col) * 100)  # Convert to percentage
+        #     .round(1)            # Round to 1 decimal
+        #     .map_elements(lambda x: f"{x:.1f}%")  # Format as string with '%'
+        #     .alias(col + "_formatted")
+        #     for col in percent_cols
+        # ]).sort(['pitcher_id','count'],descending=True)
+        columns = [
+            { "title": "ID", "field": "pitcher_id", "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitcher Name", "field": "pitcher_name", "width": 225, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Team", "field": "pitcher_team", "width": 80, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Last Pitched", "field": "last_pitched", "width": 125, "headerFilter":"input" ,"frozen":True,},
+            { "title": "Pitch Type", "field": "pitch_type", "width": 100, "headerFilter":"input" ,"frozen":True,},
+            { "title": "New?", "field": "new_pitch", "width": 75, "headerFilter":"input" ,"frozen":False,},
+            { "title": "Pitches", "field": "count", "width": 100 },
+            { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100,"formatter":"textarea"},
+            { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100,"formatter":"textarea"},
+            { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100,"formatter":"textarea"},
+            { "title": "Velocity", "field": "start_speed_formatted", "width": 100,"formatter":"textarea" },
+            { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "formatter":"textarea" },
+            { "title": "iVB", "field": "ivb_formatted", "width": 100,"formatter":"textarea" },
+            { "title": "HB", "field": "hb_formatted", "width": 100,  "formatter":"textarea" },
+            { "title": "RelH", "field": "release_pos_z_formatted", "width": 100,  "formatter":"textarea" },
+            { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "formatter":"textarea" },
+            { "title": "Extension", "field": "extension_formatted", "width": 100,  "formatter":"textarea" },
+            { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100,  "formatter":"textarea" }
+        ]
+        df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_all_compare_min()))
+        df_plot = df_merge.to_pandas()
+        team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
+        df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
+        return Tabulator(
+            df_plot,
+            table_options=TableOptions(
+                height=750,
+                columns=columns,
+            )
+        )
     @output
     @render_tabulator
     @reactive.event(input.refresh)