Spaces:
Sleeping
Sleeping
James McCool
commited on
Commit
·
6bcbd26
1
Parent(s):
9c5865c
Add player validation function for lineups
Browse filesIntroduced a new function, validate_lineup_players, to ensure that all players in specified columns exist within a set of valid players. This function is applied to both DraftKings and FanDuel lineups, enhancing data integrity by filtering out invalid entries before further processing.
app.py
CHANGED
|
@@ -173,6 +173,22 @@ def init_baselines():
|
|
| 173 |
|
| 174 |
return dk_raw, fd_raw, dk_secondary, fd_secondary
|
| 175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
@st.cache_data
|
| 177 |
def convert_df(array):
|
| 178 |
array = pd.DataFrame(array, columns=column_names)
|
|
@@ -245,6 +261,11 @@ with tab1:
|
|
| 245 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
| 246 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
| 247 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate'), key='sim_slate_var1')
|
| 249 |
sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1')
|
| 250 |
|
|
@@ -307,11 +328,15 @@ with tab1:
|
|
| 307 |
if sim_slate_var1 == 'Main Slate':
|
| 308 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split)
|
| 309 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
|
|
|
|
|
|
| 310 |
raw_baselines = dk_raw
|
| 311 |
column_names = dk_columns
|
| 312 |
elif sim_slate_var1 == 'Secondary Slate':
|
| 313 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
|
| 314 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
|
|
|
|
|
|
| 315 |
raw_baselines = dk_secondary
|
| 316 |
column_names = dk_columns
|
| 317 |
|
|
@@ -319,11 +344,15 @@ with tab1:
|
|
| 319 |
if sim_slate_var1 == 'Main Slate':
|
| 320 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split)
|
| 321 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
|
|
|
|
|
|
| 322 |
raw_baselines = fd_raw
|
| 323 |
column_names = fd_columns
|
| 324 |
elif sim_slate_var1 == 'Secondary Slate':
|
| 325 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
|
| 326 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
|
|
|
|
|
|
| 327 |
raw_baselines = fd_secondary
|
| 328 |
column_names = fd_columns
|
| 329 |
|
|
@@ -665,12 +694,14 @@ with tab2:
|
|
| 665 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
|
| 666 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
| 667 |
|
|
|
|
| 668 |
raw_baselines = dk_raw
|
| 669 |
column_names = dk_columns
|
| 670 |
elif slate_var1 == 'Secondary Slate':
|
| 671 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
|
| 672 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
| 673 |
|
|
|
|
| 674 |
raw_baselines = dk_secondary
|
| 675 |
column_names = dk_columns
|
| 676 |
|
|
@@ -694,12 +725,14 @@ with tab2:
|
|
| 694 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
|
| 695 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
| 696 |
|
|
|
|
| 697 |
raw_baselines = fd_raw
|
| 698 |
column_names = fd_columns
|
| 699 |
elif slate_var1 == 'Secondary Slate':
|
| 700 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
|
| 701 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
| 702 |
-
|
|
|
|
| 703 |
raw_baselines = fd_secondary
|
| 704 |
column_names = fd_columns
|
| 705 |
|
|
|
|
| 173 |
|
| 174 |
return dk_raw, fd_raw, dk_secondary, fd_secondary
|
| 175 |
|
| 176 |
+
@st.cache_data
|
| 177 |
+
def validate_lineup_players(df, valid_players, player_columns):
|
| 178 |
+
"""
|
| 179 |
+
Validates that all players in specified columns exist in valid_players set
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
df: DataFrame containing lineups
|
| 183 |
+
valid_players: Set of valid player names
|
| 184 |
+
player_columns: List of columns containing player names
|
| 185 |
+
|
| 186 |
+
Returns:
|
| 187 |
+
DataFrame with only valid lineups
|
| 188 |
+
"""
|
| 189 |
+
valid_rows = df[player_columns].apply(lambda x: x.isin(valid_players)).all(axis=1)
|
| 190 |
+
return df[valid_rows]
|
| 191 |
+
|
| 192 |
@st.cache_data
|
| 193 |
def convert_df(array):
|
| 194 |
array = pd.DataFrame(array, columns=column_names)
|
|
|
|
| 261 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
| 262 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
| 263 |
|
| 264 |
+
DK_seed = validate_lineup_players(DK_seed, set(dk_raw.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
| 265 |
+
FD_seed = validate_lineup_players(FD_seed, set(fd_raw.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
| 266 |
+
DK_secondary = validate_lineup_players(DK_secondary, set(dk_secondary.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
| 267 |
+
FD_secondary = validate_lineup_players(FD_secondary, set(fd_secondary.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
| 268 |
+
|
| 269 |
sim_slate_var1 = st.radio("Which data are you loading?", ('Main Slate', 'Secondary Slate'), key='sim_slate_var1')
|
| 270 |
sim_site_var1 = st.radio("What site are you working with?", ('Draftkings', 'Fanduel'), key='sim_site_var1')
|
| 271 |
|
|
|
|
| 328 |
if sim_slate_var1 == 'Main Slate':
|
| 329 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split)
|
| 330 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
| 331 |
+
|
| 332 |
+
st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
| 333 |
raw_baselines = dk_raw
|
| 334 |
column_names = dk_columns
|
| 335 |
elif sim_slate_var1 == 'Secondary Slate':
|
| 336 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split)
|
| 337 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
| 338 |
+
|
| 339 |
+
st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
| 340 |
raw_baselines = dk_secondary
|
| 341 |
column_names = dk_columns
|
| 342 |
|
|
|
|
| 344 |
if sim_slate_var1 == 'Main Slate':
|
| 345 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split)
|
| 346 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
| 347 |
+
|
| 348 |
+
st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
| 349 |
raw_baselines = fd_raw
|
| 350 |
column_names = fd_columns
|
| 351 |
elif sim_slate_var1 == 'Secondary Slate':
|
| 352 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split)
|
| 353 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
| 354 |
+
|
| 355 |
+
st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
| 356 |
raw_baselines = fd_secondary
|
| 357 |
column_names = fd_columns
|
| 358 |
|
|
|
|
| 694 |
st.session_state.working_seed = init_DK_seed_frames(sharp_split_var)
|
| 695 |
dk_id_dict = dict(zip(dk_raw.Player, dk_raw.player_id))
|
| 696 |
|
| 697 |
+
st.session_state.dk_raw = validate_lineup_players(dk_raw, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
| 698 |
raw_baselines = dk_raw
|
| 699 |
column_names = dk_columns
|
| 700 |
elif slate_var1 == 'Secondary Slate':
|
| 701 |
st.session_state.working_seed = init_DK_secondary_seed_frames(sharp_split_var)
|
| 702 |
dk_id_dict = dict(zip(dk_secondary.Player, dk_secondary.player_id))
|
| 703 |
|
| 704 |
+
st.session_state.dk_secondary = validate_lineup_players(dk_secondary, set(st.session_state.working_seed.Player), ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'FLEX'])
|
| 705 |
raw_baselines = dk_secondary
|
| 706 |
column_names = dk_columns
|
| 707 |
|
|
|
|
| 725 |
st.session_state.working_seed = init_FD_seed_frames(sharp_split_var)
|
| 726 |
fd_id_dict = dict(zip(fd_raw.Player, fd_raw.player_id))
|
| 727 |
|
| 728 |
+
st.session_state.fd_raw = validate_lineup_players(fd_raw, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
| 729 |
raw_baselines = fd_raw
|
| 730 |
column_names = fd_columns
|
| 731 |
elif slate_var1 == 'Secondary Slate':
|
| 732 |
st.session_state.working_seed = init_FD_secondary_seed_frames(sharp_split_var)
|
| 733 |
fd_id_dict = dict(zip(fd_secondary.Player, fd_secondary.player_id))
|
| 734 |
+
|
| 735 |
+
st.session_state.fd_secondary = validate_lineup_players(fd_secondary, set(st.session_state.working_seed.Player), ['PG1', 'PG2', 'SG1', 'SG2', 'SF1', 'SF2', 'PF1', 'PF2', 'C1'])
|
| 736 |
raw_baselines = fd_secondary
|
| 737 |
column_names = fd_columns
|
| 738 |
|