Spaces:

Multichem-PD
/

MLB_Season_Long

Running

File size: 17,509 Bytes

import streamlit as st
st.set_page_config(layout="wide")

for name in dir():
    if not name.startswith('_'):
        del globals()[name]

import numpy as np
import pandas as pd
import streamlit as st
import os
from database import init_conn

gcservice_account = init_conn()

master_hold = os.getenv('MASTER_HOLD')

sim_format = {'Top_finish': '{:.2%}', 'Top_5_finish': '{:.2%}', 'Top_10_finish': '{:.2%}'}

st.markdown("""
<style>
    /* Tab styling */
    .stElementContainer [data-baseweb="button-group"] {
        gap: 2.000rem;
        padding: 4px;
    }
    .stElementContainer [kind="segmented_control"] {
        height: 2.000rem;
        white-space: pre-wrap;
        background-color: #DAA520;
        color: white;
        border-radius: 20px;
        gap: 1px;
        padding: 10px 20px;
        font-weight: bold;
        transition: all 0.3s ease;
    }
    .stElementContainer [kind="segmented_controlActive"] {
        height: 3.000rem;
        background-color: #DAA520;
        border: 3px solid #FFD700;
        border-radius: 10px;
        color: black;
    }
    .stElementContainer [kind="segmented_control"]:hover {
        background-color: #FFD700;
        cursor: pointer;
    }

    div[data-baseweb="select"] > div {
        background-color: #DAA520;
        color: white;
    }

</style>""", unsafe_allow_html=True)

@st.cache_resource(ttl = 600)
def init_baselines():
    sh = gcservice_account.open_by_url(master_hold)
    worksheet = sh.worksheet('Pitcher_Proj')
    raw_display = pd.DataFrame(worksheet.get_all_records())
    raw_display.replace("", np.nan, inplace=True)
    pitcher_proj = raw_display.dropna()

    sh = gcservice_account.open_by_url(master_hold)
    worksheet = sh.worksheet('Hitter_Proj')
    raw_display = pd.DataFrame(worksheet.get_all_records())
    raw_display.replace("", np.nan, inplace=True)
    hitter_proj = raw_display.dropna()
    
    sh = gcservice_account.open_by_url(master_hold)
    worksheet = sh.worksheet('Display')
    raw_display = pd.DataFrame(worksheet.get_all_records())
    wins_proj = raw_display.dropna()
    
    return pitcher_proj, hitter_proj, wins_proj

def convert_df_to_csv(df):
    return df.to_csv().encode('utf-8')

pitcher_proj, hitter_proj, wins_proj = init_baselines()
total_teams = pitcher_proj['Team'].values.tolist()

selected_tab = st.segmented_control(
    "Select Tab",
    options=["Team Win Projections", "Pitcher Projections", "Hitter Projections", "Pitcher Simulations", "Hitter Simulations"],
    selection_mode='single',
    default='Team Win Projections',
    width='stretch',
    label_visibility='collapsed',
    key='tab_selector'
)

if selected_tab == 'Team Win Projections':
    if st.button("Reset Data", key='reset1'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    raw_frame = wins_proj.copy()
    export_frame_team = raw_frame[['Team', '2B', 'HR', 'SB', 'P_SO', 'P_H', 'P_R', 'P_HR', 'P_BB', 'LY Added', 'Added', 'LY Adj Wins', 'Adj Wins', 'Vegas', 'Proj wins', 'Diff']]
    export_frame_team = export_frame_team.sort_values(by='Proj wins', ascending=False)
    disp_frame = raw_frame[['Team', '2B', 'HR', 'SB', 'P_SO', 'P_H', 'P_R', 'P_HR', 'P_BB', 'LY Added', 'Added', 'LY Adj Wins', 'Adj Wins', 'Vegas', 'Proj wins', 'Diff']]
    disp_frame = disp_frame.sort_values(by='Proj wins', ascending=False)

    st.dataframe(disp_frame.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(precision=2), height = 1000, use_container_width = True)
    
    st.download_button(
        label="Export Team Win Projections",
        data=convert_df_to_csv(export_frame_team),
        file_name='MLB_team_win_export.csv',
        mime='text/csv',
        key='team_win_export',
    )

elif selected_tab == 'Pitcher Projections':
    if st.button("Reset Data", key='reset2'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    raw_frame = pitcher_proj.copy()
    split_var1 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var1')
    if split_var1 == 'Specific Teams':
        team_var1 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var1')
    elif split_var1 == 'All':
        team_var1 = total_teams
    
    working_data = raw_frame[raw_frame['Team'].isin(team_var1)]
    export_frame_sp = raw_frame[['Name', 'Team', 'TBF', 'Ceiling_var', 'True_AVG', 'Hits', 'Singles%', 'Singles', 'Doubles%', 'Doubles', 'xHR%', 'Homeruns', 'Strikeout%', 'Strikeouts',
                              'Walk%', 'Walks', 'Runs%', 'Runs', 'ERA', 'Wins', 'Quality_starts', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_sp = working_data[['Name', 'Team', 'TBF', 'True_AVG', 'Hits', 'Singles', 'Doubles', 'Homeruns', 'Strikeouts',
                            'Walks', 'Runs', 'ERA', 'Wins', 'Quality_starts', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_sp = disp_frame_sp.sort_values(by='UD_fpts', ascending=False)
    st.dataframe(disp_frame_sp.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn_r').background_gradient(cmap='RdYlGn', subset=['TBF', 'Strikeouts', 'Wins', 'Quality_starts', 'UD_fpts', 'DK_fpts']).format(precision=2), height = 1000, use_container_width = True)
    
    st.download_button(
        label="Export Pitcher Projections",
        data=convert_df_to_csv(export_frame_sp),
        file_name='MLB_pitcher_proj_export.csv',
        mime='text/csv',
        key='pitcher_proj_export',
    )

elif selected_tab == 'Hitter Projections':
    if st.button("Reset Data", key='reset3'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    raw_frame = hitter_proj.copy()
    split_var2 = st.radio("Would you like to view all teams or specific ones?", ('All', 'Specific Teams'), key='split_var2')
    if split_var2 == 'Specific Teams':
        team_var2 = st.multiselect('Which teams would you like to include in the tables?', options = total_teams, key='team_var2')
    elif split_var2 == 'All':
        team_var2 = total_teams
    
    working_data = raw_frame[raw_frame['Team'].isin(team_var2)]
    export_frame_h = raw_frame[['Name', 'Team', 'PA', 'Ceiling_var', 'Walk%', 'Walks', 'xHits', 'Singles%', 'Singles', 'Doubles%', 'Doubles',
                              'xHR%', 'Homeruns', 'Runs%', 'Runs', 'RBI%', 'RBI', 'Steal%', 'Stolen_bases', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_h = working_data[['Name', 'Team', 'PA', 'Walks', 'xHits', 'Singles', 'Doubles',
                            'Homeruns', 'Runs', 'RBI', 'Stolen_bases', 'ADP', 'UD_fpts', 'DK_fpts']]
    disp_frame_h = disp_frame_h.sort_values(by='UD_fpts', ascending=False)
    st.dataframe(disp_frame_h.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').background_gradient(cmap='RdYlGn_r', subset=['ADP']).format(precision=2), height = 1000, use_container_width = True)
    
    st.download_button(
        label="Export Hitter Projections",
        data=convert_df_to_csv(export_frame_h),
        file_name='MLB_hitter_proj_export.csv',
        mime='text/csv',
        key='hitter_proj_export',
    )
    
elif selected_tab == 'Pitcher Simulations':
    if st.button("Reset Data", key='reset4'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    col1, col2 = st.columns([1, 5])
    
    with col2:
        df_hold_container = st.empty()
    
    with col1:
        prop_type_var_sp = st.selectbox('Select type of prop to simulate', options = ['Strikeouts', 'Wins', 'Quality_starts'], key='prop_type_var_sp')

        if st.button('Simulate Stat', key='sim_sp'):
            with col2:
                   
                    with df_hold_container.container():

                        df = pitcher_proj.copy()
                        
                        total_sims = 5000

                        df.replace("", 0, inplace=True)

                        if prop_type_var_sp == 'Strikeouts':
                            df['Median'] = df['Strikeouts']
                            stat_cap = 300
                        elif prop_type_var_sp == 'Wins':
                            df['Median'] = df['Wins']
                            stat_cap = 25
                        elif prop_type_var_sp == 'Quality_starts':
                            df['Median'] = df['Quality_starts']
                            stat_cap = 30

                        flex_file = df.copy()
                        flex_file.rename(columns={"Name": "Player"}, inplace = True)
                        flex_file['Floor'] = (flex_file['Median'] * .25)
                        flex_file['Ceiling'] = np.where((flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])) > stat_cap, stat_cap + (flex_file['Median']/10), (flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])))
                        flex_file['STD'] = (flex_file['Median']/3)
                        flex_file = flex_file[['Player', 'Floor', 'Median', 'Ceiling', 'STD']]

                        hold_file = flex_file.copy()
                        hold_file = hold_file.sort_values(by='Median', ascending=False)
                        overall_file = flex_file.copy()
                        overall_file = overall_file.sort_values(by='Median', ascending=False)

                        overall_players = overall_file[['Player']]

                        for x in range(0,total_sims):
                            overall_file['g'] = np.random.gumbel(overall_file['Median'] * .75,overall_file['STD'])
                            overall_file[x] = np.where((overall_file['g']<=overall_file['Ceiling']),overall_file['g'],overall_file['Ceiling'])

                        check_file = overall_file.copy()
                        overall_file=overall_file.drop(['Player', 'Floor', 'Median', 'Ceiling', 'STD', 'g'], axis=1)
                        overall_file.astype('int').dtypes

                        players_only = hold_file[['Player']]
                        raw_lineups_file = players_only.copy()

                        for x in range(0,total_sims):
                            maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
                            raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
                            players_only[x] = raw_lineups_file[x].rank(ascending=False)

                        players_only=players_only.drop(['Player'], axis=1)
                        players_only.astype('int').dtypes

                        players_only['Average_Rank'] = players_only.mean(axis=1)
                        players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
                        players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
                        players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
                        players_only['10%'] = overall_file.quantile(0.1, axis=1)
                        players_only['90%'] = overall_file.quantile(0.9, axis=1)

                        players_only['Player'] = hold_file[['Player']]

                        final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '10%', '90%']]

                        final_Proj = pd.merge(hold_file, final_outcomes, on="Player")
                        final_Proj = final_Proj[['Player', '10%', 'Median', '90%', 'Top_finish', 'Top_5_finish', 'Top_10_finish']]
                        final_Proj.rename(columns={"Median": "Projection"}, inplace = True)
                        
                    with df_hold_container.container():     
                        df_hold_container = st.empty()
                        st.dataframe(final_Proj.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(sim_format, precision=2), use_container_width = True)
    
elif selected_tab == 'Hitter Simulations':
    if st.button("Reset Data", key='reset5'):
              st.cache_data.clear()
              pitcher_proj, hitter_proj, wins_proj = init_baselines()
              total_teams = pitcher_proj['Team'].values.tolist()
    col1, col2 = st.columns([1, 5])
    
    with col2:
        df_hold_container = st.empty()
    
    with col1:
        prop_type_var_h = st.selectbox('Select type of prop to simulate', options = ['Hits', 'Doubles', 'Home Runs', 'RBI', 'Stolen Bases'], key='prop_type_var_h')
        

        if st.button('Simulate Stat', key='sim_h'):
            with col2:
                   
                    with df_hold_container.container():

                        df = hitter_proj.copy()
                        
                        total_sims = 5000

                        df.replace("", 0, inplace=True)

                        if prop_type_var_h == 'Hits':
                            df['Median'] = df['xHits']
                            stat_cap = 250
                        elif prop_type_var_h == 'Doubles':
                            df['Median'] = df['Doubles']
                            stat_cap = 65
                        elif prop_type_var_h == 'Home Runs':
                            df['Median'] = df['Homeruns']
                            stat_cap = 75
                        elif prop_type_var_h == 'RBI':
                            df['Median'] = df['RBI']
                            stat_cap = 150
                        elif prop_type_var_h == 'Stolen Bases':
                            df['Median'] = df['Stolen_bases']
                            stat_cap = 80

                        flex_file = df.copy()
                        flex_file.rename(columns={"Name": "Player"}, inplace = True)
                        flex_file['Floor'] = (flex_file['Median'] * .15)
                        flex_file['Ceiling'] = np.where((flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])) > stat_cap, stat_cap + (flex_file['Median']/20), (flex_file['Median'] + (flex_file['Median'] * flex_file['Ceiling_var'])))
                        flex_file['STD'] = (flex_file['Median']/2)
                        flex_file = flex_file[['Player', 'Floor', 'Median', 'Ceiling', 'STD']]

                        hold_file = flex_file.copy()
                        hold_file = hold_file.sort_values(by='Median', ascending=False)
                        overall_file = flex_file.copy()
                        overall_file = overall_file.sort_values(by='Median', ascending=False)

                        overall_players = overall_file[['Player']]

                        for x in range(0,total_sims):
                            overall_file['g'] = np.random.gumbel(overall_file['Median'] * .5,overall_file['STD'])
                            overall_file[x] = np.where((overall_file['g']<=overall_file['Ceiling']),overall_file['g'],overall_file['Ceiling'])

                        check_file = overall_file.copy()
                        overall_file=overall_file.drop(['Player', 'Floor', 'Median', 'Ceiling', 'STD', 'g'], axis=1)
                        overall_file.astype('int').dtypes

                        players_only = hold_file[['Player']]
                        raw_lineups_file = players_only.copy()

                        for x in range(0,total_sims):
                            maps_dict = {'proj_map':dict(zip(hold_file.Player,overall_file[x]))}
                            raw_lineups_file[x] = sum([raw_lineups_file['Player'].map(maps_dict['proj_map'])])
                            players_only[x] = raw_lineups_file[x].rank(ascending=False)

                        players_only=players_only.drop(['Player'], axis=1)
                        players_only.astype('int').dtypes

                        players_only['Average_Rank'] = players_only.mean(axis=1)
                        players_only['Top_finish'] = players_only[players_only == 1].count(axis=1)/total_sims
                        players_only['Top_5_finish'] = players_only[players_only <= 5].count(axis=1)/total_sims
                        players_only['Top_10_finish'] = players_only[players_only <= 10].count(axis=1)/total_sims
                        players_only['10%'] = overall_file.quantile(0.1, axis=1)
                        players_only['90%'] = overall_file.quantile(0.9, axis=1)

                        players_only['Player'] = hold_file[['Player']]

                        final_outcomes = players_only[['Player', 'Top_finish', 'Top_5_finish', 'Top_10_finish', '10%', '90%']]

                        final_Proj = pd.merge(hold_file, final_outcomes, on="Player")
                        final_Proj = final_Proj[['Player', '10%', 'Median', '90%', 'Top_finish', 'Top_5_finish', 'Top_10_finish']]
                        final_Proj.rename(columns={"Median": "Projection"}, inplace = True)

                    with df_hold_container.container():     
                        df_hold_container = st.empty()
                        st.dataframe(final_Proj.style.background_gradient(axis=0).background_gradient(cmap='RdYlGn').format(sim_format, precision=2), use_container_width = True)