Spaces:
Sleeping
Sleeping
James McCool
commited on
Commit
·
7400821
1
Parent(s):
0f5f58e
Implement player name standardization in app.py to enhance data consistency across simulations. Added a new function to remove common suffixes from player names and updated relevant mappings to apply this standardization. This change improves the accuracy of player data handling for contest simulations on DraftKings and FanDuel.
Browse files
app.py
CHANGED
|
@@ -192,6 +192,15 @@ def init_baselines(sport):
|
|
| 192 |
|
| 193 |
return dk_raw, fd_raw
|
| 194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
@st.cache_data
|
| 196 |
def convert_df(array):
|
| 197 |
array = pd.DataFrame(array, columns=column_names)
|
|
@@ -216,12 +225,14 @@ def sim_contest(Sim_size, seed_frame, maps_dict, sharp_split, Contest_Size):
|
|
| 216 |
SimVar = 1
|
| 217 |
Sim_Winners = []
|
| 218 |
fp_array = seed_frame[:sharp_split, :]
|
|
|
|
|
|
|
| 219 |
|
| 220 |
# Pre-vectorize functions
|
| 221 |
-
vec_projection_map = np.vectorize(maps_dict['Projection_map'].__getitem__)
|
| 222 |
-
vec_cpt_projection_map = np.vectorize(maps_dict['cpt_projection_map'].__getitem__)
|
| 223 |
-
vec_stdev_map = np.vectorize(maps_dict['STDev_map'].__getitem__)
|
| 224 |
-
vec_cpt_stdev_map = np.vectorize(maps_dict['cpt_STDev_map'].__getitem__)
|
| 225 |
|
| 226 |
st.write('Simulating contest on frames')
|
| 227 |
|
|
@@ -432,15 +443,15 @@ with tab1:
|
|
| 432 |
if st.button("Run Contest Sim"):
|
| 433 |
if 'working_seed' in st.session_state:
|
| 434 |
maps_dict = {
|
| 435 |
-
'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)),
|
| 436 |
-
'cpt_projection_map':dict(zip(raw_baselines.Player,raw_baselines.cpt_Median)),
|
| 437 |
-
'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)),
|
| 438 |
-
'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)),
|
| 439 |
-
'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])),
|
| 440 |
-
'cpt_Own_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_Own'])),
|
| 441 |
-
'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)),
|
| 442 |
-
'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)),
|
| 443 |
-
'cpt_STDev_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_STDev']))
|
| 444 |
}
|
| 445 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
| 446 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
|
@@ -473,15 +484,15 @@ with tab1:
|
|
| 473 |
elif sim_site_var1 == 'Fanduel':
|
| 474 |
st.session_state.working_seed = FD_seed.copy()
|
| 475 |
maps_dict = {
|
| 476 |
-
'Projection_map':dict(zip(raw_baselines.Player,raw_baselines.Median)),
|
| 477 |
-
'cpt_projection_map':dict(zip(raw_baselines.Player,raw_baselines.cpt_Median)),
|
| 478 |
-
'Salary_map':dict(zip(raw_baselines.Player,raw_baselines.Salary)),
|
| 479 |
-
'Pos_map':dict(zip(raw_baselines.Player,raw_baselines.Position)),
|
| 480 |
-
'Own_map':dict(zip(raw_baselines.Player,raw_baselines['Own'])),
|
| 481 |
-
'cpt_Own_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_Own'])),
|
| 482 |
-
'Team_map':dict(zip(raw_baselines.Player,raw_baselines.Team)),
|
| 483 |
-
'STDev_map':dict(zip(raw_baselines.Player,raw_baselines.STDev)),
|
| 484 |
-
'cpt_STDev_map':dict(zip(raw_baselines.Player,raw_baselines['CPT_STDev']))
|
| 485 |
}
|
| 486 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
| 487 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
|
@@ -517,18 +528,18 @@ with tab1:
|
|
| 517 |
freq_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:5].values, return_counts=True)),
|
| 518 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
| 519 |
freq_working['Freq'] = freq_working['Freq'].astype(int)
|
| 520 |
-
freq_working['Position'] = freq_working['Player'].map(maps_dict['Pos_map'])
|
| 521 |
if sim_site_var1 == 'Draftkings':
|
| 522 |
if sim_sport_var1 == 'NFL':
|
| 523 |
-
freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map']) / 1.5
|
| 524 |
elif sim_sport_var1 == 'NBA':
|
| 525 |
-
freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map'])
|
| 526 |
elif sim_site_var1 == 'Fanduel':
|
| 527 |
-
freq_working['Salary'] = freq_working['Player'].map(maps_dict['Salary_map'])
|
| 528 |
-
freq_working['Proj Own'] = freq_working['Player'].map(maps_dict['Own_map']) / 100
|
| 529 |
freq_working['Exposure'] = freq_working['Freq']/(1000)
|
| 530 |
freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own']
|
| 531 |
-
freq_working['Team'] = freq_working['Player'].map(maps_dict['Team_map'])
|
| 532 |
st.session_state.player_freq = freq_working.copy()
|
| 533 |
|
| 534 |
if sim_site_var1 == 'Draftkings':
|
|
@@ -538,15 +549,15 @@ with tab1:
|
|
| 538 |
cpt_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:1].values, return_counts=True)),
|
| 539 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
| 540 |
cpt_working['Freq'] = cpt_working['Freq'].astype(int)
|
| 541 |
-
cpt_working['Position'] = cpt_working['Player'].map(maps_dict['Pos_map'])
|
| 542 |
if sim_sport_var1 == 'NFL':
|
| 543 |
-
cpt_working['Salary'] = cpt_working['Player'].map(maps_dict['Salary_map'])
|
| 544 |
elif sim_sport_var1 == 'NBA':
|
| 545 |
-
cpt_working['Salary'] = cpt_working['Player'].map(maps_dict['Salary_map']) * 1.5
|
| 546 |
-
cpt_working['Proj Own'] = cpt_working['Player'].map(maps_dict['cpt_Own_map']) / 100
|
| 547 |
cpt_working['Exposure'] = cpt_working['Freq']/(1000)
|
| 548 |
cpt_working['Edge'] = cpt_working['Exposure'] - cpt_working['Proj Own']
|
| 549 |
-
cpt_working['Team'] = cpt_working['Player'].map(maps_dict['Team_map'])
|
| 550 |
st.session_state.sp_freq = cpt_working.copy()
|
| 551 |
|
| 552 |
if sim_site_var1 == 'Draftkings':
|
|
@@ -561,15 +572,15 @@ with tab1:
|
|
| 561 |
flex_working['Position'] = flex_working['Player'].map(maps_dict['Pos_map'])
|
| 562 |
if sim_site_var1 == 'Draftkings':
|
| 563 |
if sim_sport_var1 == 'NFL':
|
| 564 |
-
flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map']) / 1.5
|
| 565 |
elif sim_sport_var1 == 'NBA':
|
| 566 |
-
flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map'])
|
| 567 |
elif sim_site_var1 == 'Fanduel':
|
| 568 |
-
flex_working['Salary'] = flex_working['Player'].map(maps_dict['Salary_map'])
|
| 569 |
-
flex_working['Proj Own'] = (flex_working['Player'].map(maps_dict['Own_map']) / 100) - (flex_working['Player'].map(maps_dict['cpt_Own_map']) / 100)
|
| 570 |
flex_working['Exposure'] = flex_working['Freq']/(1000)
|
| 571 |
flex_working['Edge'] = flex_working['Exposure'] - flex_working['Proj Own']
|
| 572 |
-
flex_working['Team'] = flex_working['Player'].map(maps_dict['Team_map'])
|
| 573 |
st.session_state.flex_freq = flex_working.copy()
|
| 574 |
|
| 575 |
if sim_site_var1 == 'Draftkings':
|
|
|
|
| 192 |
|
| 193 |
return dk_raw, fd_raw
|
| 194 |
|
| 195 |
+
@st.cache_data
|
| 196 |
+
def standardize_name(name):
|
| 197 |
+
# Remove common suffixes and standardize
|
| 198 |
+
suffixes = [' Jr.', ' Jr', ' Sr.', ' Sr', ' III', ' II', ' IV']
|
| 199 |
+
name = str(name) # Ensure name is a string
|
| 200 |
+
for suffix in suffixes:
|
| 201 |
+
name = name.replace(suffix, '')
|
| 202 |
+
return name.strip()
|
| 203 |
+
|
| 204 |
@st.cache_data
|
| 205 |
def convert_df(array):
|
| 206 |
array = pd.DataFrame(array, columns=column_names)
|
|
|
|
| 225 |
SimVar = 1
|
| 226 |
Sim_Winners = []
|
| 227 |
fp_array = seed_frame[:sharp_split, :]
|
| 228 |
+
|
| 229 |
+
vec_standardize = np.vectorize(standardize_name)
|
| 230 |
|
| 231 |
# Pre-vectorize functions
|
| 232 |
+
vec_projection_map = lambda x: np.vectorize(maps_dict['Projection_map'].__getitem__)(vec_standardize(x))
|
| 233 |
+
vec_cpt_projection_map = lambda x: np.vectorize(maps_dict['cpt_projection_map'].__getitem__)(vec_standardize(x))
|
| 234 |
+
vec_stdev_map = lambda x: np.vectorize(maps_dict['STDev_map'].__getitem__)(vec_standardize(x))
|
| 235 |
+
vec_cpt_stdev_map = lambda x: np.vectorize(maps_dict['cpt_STDev_map'].__getitem__)(vec_standardize(x))
|
| 236 |
|
| 237 |
st.write('Simulating contest on frames')
|
| 238 |
|
|
|
|
| 443 |
if st.button("Run Contest Sim"):
|
| 444 |
if 'working_seed' in st.session_state:
|
| 445 |
maps_dict = {
|
| 446 |
+
'Projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Median)),
|
| 447 |
+
'cpt_projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.cpt_Median)),
|
| 448 |
+
'Salary_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Salary)),
|
| 449 |
+
'Pos_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Position)),
|
| 450 |
+
'Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['Own'])),
|
| 451 |
+
'cpt_Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_Own'])),
|
| 452 |
+
'Team_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Team)),
|
| 453 |
+
'STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.STDev)),
|
| 454 |
+
'cpt_STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_STDev']))
|
| 455 |
}
|
| 456 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
| 457 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
|
|
|
| 484 |
elif sim_site_var1 == 'Fanduel':
|
| 485 |
st.session_state.working_seed = FD_seed.copy()
|
| 486 |
maps_dict = {
|
| 487 |
+
'Projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Median)),
|
| 488 |
+
'cpt_projection_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.cpt_Median)),
|
| 489 |
+
'Salary_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Salary)),
|
| 490 |
+
'Pos_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Position)),
|
| 491 |
+
'Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['Own'])),
|
| 492 |
+
'cpt_Own_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_Own'])),
|
| 493 |
+
'Team_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.Team)),
|
| 494 |
+
'STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines.STDev)),
|
| 495 |
+
'cpt_STDev_map':dict(zip(raw_baselines.Player.apply(standardize_name),raw_baselines['CPT_STDev']))
|
| 496 |
}
|
| 497 |
Sim_Winners = sim_contest(1000, st.session_state.working_seed, maps_dict, sharp_split, Contest_Size)
|
| 498 |
Sim_Winner_Frame = pd.DataFrame(np.concatenate(Sim_Winners))
|
|
|
|
| 528 |
freq_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:5].values, return_counts=True)),
|
| 529 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
| 530 |
freq_working['Freq'] = freq_working['Freq'].astype(int)
|
| 531 |
+
freq_working['Position'] = freq_working['Player'].apply(standardize_name).apply(standardize_name).map(maps_dict['Pos_map'])
|
| 532 |
if sim_site_var1 == 'Draftkings':
|
| 533 |
if sim_sport_var1 == 'NFL':
|
| 534 |
+
freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) / 1.5
|
| 535 |
elif sim_sport_var1 == 'NBA':
|
| 536 |
+
freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
| 537 |
elif sim_site_var1 == 'Fanduel':
|
| 538 |
+
freq_working['Salary'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
| 539 |
+
freq_working['Proj Own'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Own_map']) / 100
|
| 540 |
freq_working['Exposure'] = freq_working['Freq']/(1000)
|
| 541 |
freq_working['Edge'] = freq_working['Exposure'] - freq_working['Proj Own']
|
| 542 |
+
freq_working['Team'] = freq_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
|
| 543 |
st.session_state.player_freq = freq_working.copy()
|
| 544 |
|
| 545 |
if sim_site_var1 == 'Draftkings':
|
|
|
|
| 549 |
cpt_working = pd.DataFrame(np.column_stack(np.unique(freq_copy.iloc[:,0:1].values, return_counts=True)),
|
| 550 |
columns=['Player','Freq']).sort_values('Freq', ascending=False).reset_index(drop=True)
|
| 551 |
cpt_working['Freq'] = cpt_working['Freq'].astype(int)
|
| 552 |
+
cpt_working['Position'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Pos_map'])
|
| 553 |
if sim_sport_var1 == 'NFL':
|
| 554 |
+
cpt_working['Salary'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
| 555 |
elif sim_sport_var1 == 'NBA':
|
| 556 |
+
cpt_working['Salary'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) * 1.5
|
| 557 |
+
cpt_working['Proj Own'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['cpt_Own_map']) / 100
|
| 558 |
cpt_working['Exposure'] = cpt_working['Freq']/(1000)
|
| 559 |
cpt_working['Edge'] = cpt_working['Exposure'] - cpt_working['Proj Own']
|
| 560 |
+
cpt_working['Team'] = cpt_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
|
| 561 |
st.session_state.sp_freq = cpt_working.copy()
|
| 562 |
|
| 563 |
if sim_site_var1 == 'Draftkings':
|
|
|
|
| 572 |
flex_working['Position'] = flex_working['Player'].map(maps_dict['Pos_map'])
|
| 573 |
if sim_site_var1 == 'Draftkings':
|
| 574 |
if sim_sport_var1 == 'NFL':
|
| 575 |
+
flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map']) / 1.5
|
| 576 |
elif sim_sport_var1 == 'NBA':
|
| 577 |
+
flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
| 578 |
elif sim_site_var1 == 'Fanduel':
|
| 579 |
+
flex_working['Salary'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Salary_map'])
|
| 580 |
+
flex_working['Proj Own'] = (flex_working['Player'].apply(standardize_name).map(maps_dict['Own_map']) / 100) - (flex_working['Player'].apply(standardize_name).map(maps_dict['cpt_Own_map']) / 100)
|
| 581 |
flex_working['Exposure'] = flex_working['Freq']/(1000)
|
| 582 |
flex_working['Edge'] = flex_working['Exposure'] - flex_working['Proj Own']
|
| 583 |
+
flex_working['Team'] = flex_working['Player'].apply(standardize_name).map(maps_dict['Team_map'])
|
| 584 |
st.session_state.flex_freq = flex_working.copy()
|
| 585 |
|
| 586 |
if sim_site_var1 == 'Draftkings':
|