nesticot commited on
Commit
9e9f4da
·
verified ·
1 Parent(s): 5ed9e39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -0
app.py CHANGED
@@ -72,6 +72,19 @@ app_ui = ui.page_fluid(
72
 
73
  output_tabulator("table_all")
74
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ui.nav("Daily Pitches",
76
  ui.row(
77
  ui.column(2,
@@ -606,6 +619,219 @@ def server(input, output, session):
606
  )
607
 
608
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  @output
610
  @render_tabulator
611
  @reactive.event(input.refresh)
 
72
 
73
  output_tabulator("table_all")
74
  ),
75
+ ui.nav("Compre Pitches",
76
+
77
+
78
+ ui.column(2,
79
+ ui.div(
80
+ {"class": "input-group"},
81
+ ui.span("Pitches >=", class_="input-label"),
82
+ ui.input_numeric(id='pitches_all_compare_min', label='', value=1, min=1, width="100px")
83
+ )
84
+ )),
85
+
86
+ output_tabulator("table_all_compare")
87
+ ),
88
  ui.nav("Daily Pitches",
89
  ui.row(
90
  ui.column(2,
 
619
  )
620
 
621
 
622
+ @output
623
+ @render_tabulator
624
+ @reactive.event(input.refresh)
625
+ def table_all_compare():
626
+
627
+ # Step 1: Load and deduplicate
628
+ df_spring = spring_data().unique(subset=['play_id'])
629
+
630
+ # Step 2: Feature engineer
631
+ df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df_spring))
632
+
633
+ # Step 3: Identify each pitcher's last game
634
+ last_game_dates = (
635
+ df_spring_stuff
636
+ .group_by("pitcher_id")
637
+ .agg(pl.col("game_date").max().alias("last_game_date"))
638
+ )
639
+
640
+ df_spring_stuff = df_spring_stuff.join(last_game_dates, on="pitcher_id")
641
+ df_spring_stuff = df_spring_stuff.with_columns(
642
+ (pl.col("game_date") == pl.col("last_game_date")).alias("is_last_game")
643
+ )
644
+
645
+ # Step 4: Split last game vs all previous
646
+ df_last_game = df_spring_stuff.filter(pl.col("is_last_game"))
647
+ df_prior_games = df_spring_stuff.filter(~pl.col("is_last_game"))
648
+
649
+ # Step 5: Apply feature engineering to both
650
+ df_last_group = stuff_apply.stuff_apply(fe.feature_engineering(df_last_game))
651
+ df_prior_group = stuff_apply.stuff_apply(fe.feature_engineering(df_prior_games))
652
+
653
+ # Step 6: Group and aggregate both
654
+ def group_by_pitch(df):
655
+ df_pitcher_totals = df.group_by("pitcher_id").agg(
656
+ pl.col("start_speed").count().alias("pitcher_total")
657
+ )
658
+
659
+ df_pitcher_totals_hands = (
660
+ df
661
+ .group_by(["pitcher_id", "batter_hand"])
662
+ .agg(pl.col("start_speed").count().alias("pitcher_total"))
663
+ .pivot(
664
+ values="pitcher_total",
665
+ index="pitcher_id",
666
+ columns="batter_hand",
667
+ aggregate_function="sum"
668
+ )
669
+ .rename({"L": "pitcher_total_left", "R": "pitcher_total_right"})
670
+ .fill_null(0)
671
+ )
672
+
673
+ df_group = df.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
674
+ pl.col('game_date').max().alias('last_pitched'),
675
+ pl.col('start_speed').count().alias('count'),
676
+ pl.col('start_speed').mean().alias('start_speed'),
677
+ pl.col('start_speed').max().alias('max_start_speed'),
678
+ pl.col('ivb').mean().alias('ivb'),
679
+ pl.col('hb').mean().alias('hb'),
680
+ pl.col('release_pos_z').mean().alias('release_pos_z'),
681
+ pl.col('release_pos_x').mean().alias('release_pos_x'),
682
+ pl.col('extension').mean().alias('extension'),
683
+ pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
684
+ (pl.col("batter_hand") == "R").sum().alias("rhh_count"),
685
+ (pl.col("batter_hand") == "L").sum().alias("lhh_count")
686
+ ])
687
+
688
+ df_group = df_group.join(df_pitcher_totals, on="pitcher_id", how="left")
689
+ df_group = df_group.join(df_pitcher_totals_hands, on="pitcher_id", how="left")
690
+
691
+ df_group = df_group.with_columns([
692
+ (pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent"),
693
+ (pl.col("rhh_count") / pl.col("pitcher_total_right")).alias("rhh_percent"),
694
+ (pl.col("lhh_count") / pl.col("pitcher_total_left")).alias("lhh_percent")
695
+ ])
696
+
697
+ return df_group
698
+
699
+ df_last_group = group_by_pitch(df_last_group)
700
+ df_prior_group = group_by_pitch(df_prior_group)
701
+
702
+ # Step 7: Merge on pitcher_id and pitch_type
703
+ df_merge = df_last_group.join(df_prior_group, on=["pitcher_id", "pitch_type"], how="left", suffix="_prior")
704
+
705
+ # Step 8: Identify new pitch types
706
+ df_merge = df_merge.with_columns(
707
+ pl.col('pitcher_id').is_in(df_prior_group['pitcher_id']).alias('exists_in_prior')
708
+ )
709
+
710
+ df_merge = df_merge.with_columns(
711
+ pl.when(pl.col('start_speed_prior').is_null() & pl.col('exists_in_prior'))
712
+ .then(pl.lit(True))
713
+ .otherwise(pl.lit(None))
714
+ .alias("new_pitch")
715
+ )
716
+
717
+ # Step 9: Diff columns and formatted output
718
+ cols_to_subtract = [
719
+ ("start_speed", "start_speed_prior"),
720
+ ("max_start_speed", "max_start_speed_prior"),
721
+ ("ivb", "ivb_prior"),
722
+ ("hb", "hb_prior"),
723
+ ("release_pos_z", "release_pos_z_prior"),
724
+ ("release_pos_x", "release_pos_x_prior"),
725
+ ("extension", "extension_prior"),
726
+ ("tj_stuff_plus", "tj_stuff_plus_prior")
727
+ ]
728
+
729
+ df_merge = df_merge.with_columns([
730
+ pl.when(pl.col(old).is_null())
731
+ .then(pl.lit(10000))
732
+ .otherwise(pl.col(new) - pl.col(old))
733
+ .alias(new + "_diff")
734
+ for new, old in cols_to_subtract
735
+ ])
736
+
737
+ df_merge = df_merge.with_columns([
738
+ pl.when(pl.col(new + "_diff") == 10000)
739
+ .then(pl.col(new).round(1).cast(pl.Utf8) + '\n\t')
740
+ .otherwise(
741
+ pl.col(new).round(1).cast(pl.Utf8) +
742
+ "\n(" +
743
+ pl.col(new + "_diff").round(1).map_elements(lambda x: f"{x:+.1f}") +
744
+ ")"
745
+ ).alias(new + "_formatted")
746
+ for new, _ in cols_to_subtract
747
+ ])
748
+
749
+ cols_to_subtract_percent = [
750
+ ("pitch_percent", "pitch_percent_prior"),
751
+ ("rhh_percent", "rhh_percent_prior"),
752
+ ("lhh_percent", "lhh_percent_prior")
753
+ ]
754
+
755
+ df_merge = df_merge.with_columns([
756
+ pl.when(pl.col(old).is_null())
757
+ .then(pl.lit(10000))
758
+ .otherwise(pl.col(new) - pl.col(old))
759
+ .alias(new + "_diff")
760
+ for new, old in cols_to_subtract_percent
761
+ ])
762
+
763
+ df_merge = df_merge.with_columns([
764
+ pl.when(pl.col(new + "_diff") == 10000)
765
+ .then(
766
+ (pl.col(new)*100).round(1).map_elements(lambda x: f"{x:.1f}%").cast(pl.Utf8) +
767
+ "\n(" +
768
+ (pl.col(new)*100).round(1).map_elements(lambda x: f"{x:+.1f}%") +
769
+ ")"
770
+ )
771
+ .otherwise(
772
+ (pl.col(new)*100).round(1).map_elements(lambda x: f"{x:.1f}%").cast(pl.Utf8) +
773
+ "\n(" +
774
+ (pl.col(new + "_diff")*100).round(1).map_elements(lambda x: f"{x:+.1f}%") +
775
+ ")"
776
+ ).alias(new + "_formatted")
777
+ for new, _ in cols_to_subtract_percent
778
+ ])
779
+
780
+
781
+
782
+ # df_merge = df_merge.with_columns([
783
+ # (pl.col(col) * 100) # Convert to percentage
784
+ # .round(1) # Round to 1 decimal
785
+ # .map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
786
+ # .alias(col + "_formatted")
787
+ # for col in percent_cols
788
+ # ]).sort(['pitcher_id','count'],descending=True)
789
+
790
+
791
+ columns = [
792
+ { "title": "ID", "field": "pitcher_id", "headerFilter":"input" ,"frozen":True,},
793
+ { "title": "Pitcher Name", "field": "pitcher_name", "width": 225, "headerFilter":"input" ,"frozen":True,},
794
+ { "title": "Team", "field": "pitcher_team", "width": 80, "headerFilter":"input" ,"frozen":True,},
795
+ { "title": "Last Pitched", "field": "last_pitched", "width": 125, "headerFilter":"input" ,"frozen":True,},
796
+ { "title": "Pitch Type", "field": "pitch_type", "width": 100, "headerFilter":"input" ,"frozen":True,},
797
+ { "title": "New?", "field": "new_pitch", "width": 75, "headerFilter":"input" ,"frozen":False,},
798
+ { "title": "Pitches", "field": "count", "width": 100 },
799
+ { "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100,"formatter":"textarea"},
800
+ { "title": "LHH%", "field": "lhh_percent_formatted", "width": 100,"formatter":"textarea"},
801
+ { "title": "RHH%", "field": "rhh_percent_formatted", "width": 100,"formatter":"textarea"},
802
+ { "title": "Velocity", "field": "start_speed_formatted", "width": 100,"formatter":"textarea" },
803
+ { "title": "Max Velo", "field": "max_start_speed_formatted", "width": 100, "formatter":"textarea" },
804
+ { "title": "iVB", "field": "ivb_formatted", "width": 100,"formatter":"textarea" },
805
+ { "title": "HB", "field": "hb_formatted", "width": 100, "formatter":"textarea" },
806
+ { "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "formatter":"textarea" },
807
+ { "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "formatter":"textarea" },
808
+ { "title": "Extension", "field": "extension_formatted", "width": 100, "formatter":"textarea" },
809
+ { "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "formatter":"textarea" }
810
+ ]
811
+
812
+
813
+
814
+ df_merge = df_merge.filter(pl.col('count')>=int(input.pitches_all_compare_min()))
815
+
816
+
817
+ df_plot = df_merge.to_pandas()
818
+
819
+ team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
820
+ df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
821
+
822
+
823
+
824
+ return Tabulator(
825
+ df_plot,
826
+
827
+ table_options=TableOptions(
828
+ height=750,
829
+
830
+ columns=columns,
831
+ )
832
+ )
833
+
834
+
835
  @output
836
  @render_tabulator
837
  @reactive.event(input.refresh)