James McCool
commited on
Commit
·
40a0be8
1
Parent(s):
34bba3a
Add memory optimization functions and implement chunked processing for name matching in app.py
Browse filesIntroduced helper functions for chunked name matching and DataFrame type optimization to enhance memory efficiency. Updated the data loading process to utilize these functions, ensuring better performance and reduced memory usage during portfolio and projections handling.
app.py
CHANGED
|
@@ -105,6 +105,105 @@ st.markdown("""
|
|
| 105 |
|
| 106 |
</style>""", unsafe_allow_html=True)
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
| 109 |
try:
|
| 110 |
# Remove any numbers from the column name to get the position
|
|
@@ -339,9 +438,20 @@ if selected_tab == 'Data Load':
|
|
| 339 |
else:
|
| 340 |
stack_dict = None
|
| 341 |
if st.session_state['portfolio'] is not None:
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
st.success('Portfolio file loaded successfully!')
|
| 344 |
-
st.session_state['portfolio']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
st.dataframe(st.session_state['portfolio'].head(10))
|
| 346 |
|
| 347 |
with col3:
|
|
@@ -371,6 +481,8 @@ if selected_tab == 'Data Load':
|
|
| 371 |
export_projections, projections = load_file(projections_file, site_var, type_var, sport_var, 'projections')
|
| 372 |
if projections is not None:
|
| 373 |
st.success('Projections file loaded successfully!')
|
|
|
|
|
|
|
| 374 |
try:
|
| 375 |
projections['salary'] = projections['salary'].str.replace(',', '').str.replace('$', '').str.replace(' ', '')
|
| 376 |
st.write('replaced salary symbols')
|
|
@@ -381,16 +493,25 @@ if selected_tab == 'Data Load':
|
|
| 381 |
st.write('replaced ownership symbols')
|
| 382 |
except:
|
| 383 |
pass
|
| 384 |
-
|
| 385 |
-
|
|
|
|
|
|
|
|
|
|
| 386 |
if projections['captain ownership'].isna().all():
|
| 387 |
projections['CPT_Own_raw'] = (projections['ownership'] / 2) * ((100 - (100-projections['ownership']))/100)
|
| 388 |
cpt_own_var = 100 / projections['CPT_Own_raw'].sum()
|
| 389 |
projections['captain ownership'] = projections['CPT_Own_raw'] * cpt_own_var
|
| 390 |
projections = projections.drop(columns='CPT_Own_raw', axis=1)
|
| 391 |
-
|
| 392 |
-
projections = projections
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
if position_var is not None:
|
| 395 |
projections['position'] = position_var
|
| 396 |
if team_var is not None:
|
|
@@ -402,7 +523,7 @@ if selected_tab == 'Data Load':
|
|
| 402 |
if st.session_state['portfolio'] is not None and projections is not None:
|
| 403 |
|
| 404 |
st.subheader("Name Matching Analysis")
|
| 405 |
-
|
| 406 |
# Get unique names from portfolio
|
| 407 |
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
| 408 |
try:
|
|
@@ -411,78 +532,35 @@ if selected_tab == 'Data Load':
|
|
| 411 |
csv_names = st.session_state['csv_file']['Nickname'].tolist()
|
| 412 |
projection_names = projections['player_names'].tolist()
|
| 413 |
|
| 414 |
-
#
|
| 415 |
-
portfolio_match_dict =
|
| 416 |
-
unmatched_names = []
|
| 417 |
-
for portfolio_name in portfolio_names:
|
| 418 |
-
match = process.extractOne(
|
| 419 |
-
portfolio_name,
|
| 420 |
-
csv_names,
|
| 421 |
-
score_cutoff=87
|
| 422 |
-
)
|
| 423 |
-
if match:
|
| 424 |
-
portfolio_match_dict[portfolio_name] = match[0]
|
| 425 |
-
if match[1] < 100:
|
| 426 |
-
st.write(f"{portfolio_name} matched from portfolio to site csv {match[0]} with a score of {match[1]}%")
|
| 427 |
-
else:
|
| 428 |
-
portfolio_match_dict[portfolio_name] = portfolio_name
|
| 429 |
-
unmatched_names.append(portfolio_name)
|
| 430 |
|
| 431 |
-
# Update portfolio with matched names
|
| 432 |
-
|
| 433 |
-
player_columns = [col for col in portfolio.columns
|
| 434 |
if col not in ['salary', 'median', 'Own']]
|
| 435 |
|
| 436 |
# For each player column, update names using the match dictionary
|
| 437 |
for col in player_columns:
|
| 438 |
-
portfolio[col] = portfolio[col].map(lambda x: portfolio_match_dict.get(x, x))
|
| 439 |
-
st.session_state['portfolio'] = portfolio
|
| 440 |
|
| 441 |
-
# Create match dictionary for
|
| 442 |
-
projections_match_dict =
|
| 443 |
-
unmatched_proj_names = []
|
| 444 |
-
for projections_name in projection_names:
|
| 445 |
-
match = process.extractOne(
|
| 446 |
-
projections_name,
|
| 447 |
-
csv_names,
|
| 448 |
-
score_cutoff=87
|
| 449 |
-
)
|
| 450 |
-
if match:
|
| 451 |
-
projections_match_dict[projections_name] = match[0]
|
| 452 |
-
if match[1] < 100:
|
| 453 |
-
st.write(f"{projections_name} matched from projections to site csv {match[0]} with a score of {match[1]}%")
|
| 454 |
-
else:
|
| 455 |
-
projections_match_dict[projections_name] = projections_name
|
| 456 |
-
unmatched_proj_names.append(projections_name)
|
| 457 |
|
| 458 |
# Update projections with matched names
|
| 459 |
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
| 460 |
st.session_state['projections_df'] = projections
|
| 461 |
-
|
|
|
|
| 462 |
projections_names = st.session_state['projections_df']['player_names'].tolist()
|
| 463 |
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
| 464 |
|
| 465 |
-
|
| 466 |
-
projections_match_dict = {}
|
| 467 |
-
unmatched_proj_names = []
|
| 468 |
-
for projections_name in projection_names:
|
| 469 |
-
match = process.extractOne(
|
| 470 |
-
projections_name,
|
| 471 |
-
portfolio_names,
|
| 472 |
-
score_cutoff=87
|
| 473 |
-
)
|
| 474 |
-
if match:
|
| 475 |
-
projections_match_dict[projections_name] = match[0]
|
| 476 |
-
if match[1] < 100:
|
| 477 |
-
st.write(f"{projections_name} matched from portfolio to projections {match[0]} with a score of {match[1]}%")
|
| 478 |
-
else:
|
| 479 |
-
projections_match_dict[projections_name] = projections_name
|
| 480 |
-
unmatched_proj_names.append(projections_name)
|
| 481 |
|
| 482 |
# Update projections with matched names
|
| 483 |
-
projections['player_names'] = projections['player_names'].map(lambda x:
|
| 484 |
st.session_state['projections_df'] = projections
|
| 485 |
|
|
|
|
| 486 |
if sport_var in stacking_sports:
|
| 487 |
team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
|
| 488 |
st.session_state['portfolio']['Stack'] = st.session_state['portfolio'].apply(
|
|
@@ -502,78 +580,23 @@ if selected_tab == 'Data Load':
|
|
| 502 |
st.session_state['stack_dict'] = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Stack']))
|
| 503 |
st.session_state['size_dict'] = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Size']))
|
| 504 |
|
|
|
|
| 505 |
try:
|
| 506 |
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Name'], st.session_state['csv_file']['Name + ID']))
|
| 507 |
except:
|
| 508 |
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Nickname'], st.session_state['csv_file']['Id']))
|
|
|
|
|
|
|
| 509 |
if 'map_dict' not in st.session_state:
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
st.session_state['map_dict'] = {
|
| 514 |
-
'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
|
| 515 |
-
'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
|
| 516 |
-
'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 517 |
-
'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
|
| 518 |
-
'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
|
| 519 |
-
'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
|
| 520 |
-
'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
|
| 521 |
-
'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
|
| 522 |
-
'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
|
| 523 |
-
}
|
| 524 |
-
elif sport_var != 'CS2' and sport_var != 'LOL':
|
| 525 |
-
st.session_state['map_dict'] = {
|
| 526 |
-
'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
|
| 527 |
-
'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
|
| 528 |
-
'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 529 |
-
'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
|
| 530 |
-
'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
|
| 531 |
-
'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
|
| 532 |
-
'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 533 |
-
'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
|
| 534 |
-
'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
|
| 535 |
-
}
|
| 536 |
-
elif type_var == 'Showdown':
|
| 537 |
-
if sport_var == 'GOLF':
|
| 538 |
-
st.session_state['map_dict'] = {
|
| 539 |
-
'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
|
| 540 |
-
'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
|
| 541 |
-
'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 542 |
-
'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
|
| 543 |
-
'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
|
| 544 |
-
'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
|
| 545 |
-
'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 546 |
-
'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
|
| 547 |
-
'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership']))
|
| 548 |
-
}
|
| 549 |
-
if sport_var != 'GOLF':
|
| 550 |
-
st.session_state['map_dict'] = {
|
| 551 |
-
'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
|
| 552 |
-
'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
|
| 553 |
-
'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 554 |
-
'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
|
| 555 |
-
'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
|
| 556 |
-
'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
|
| 557 |
-
'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
|
| 558 |
-
'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
|
| 559 |
-
'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
|
| 560 |
-
}
|
| 561 |
-
elif site_var == 'Fanduel':
|
| 562 |
-
st.session_state['map_dict'] = {
|
| 563 |
-
'pos_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['position'])),
|
| 564 |
-
'team_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team'])),
|
| 565 |
-
'salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'])),
|
| 566 |
-
'proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'])),
|
| 567 |
-
'own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'])),
|
| 568 |
-
'own_percent_rank':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['ownership'].rank(pct=True))),
|
| 569 |
-
'cpt_salary_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['salary'] * 1.5)),
|
| 570 |
-
'cpt_proj_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['median'] * 1.5)),
|
| 571 |
-
'cpt_own_map':dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['captain ownership']))
|
| 572 |
-
}
|
| 573 |
st.session_state['origin_portfolio'] = st.session_state['portfolio']
|
| 574 |
buffer = io.BytesIO()
|
| 575 |
st.session_state['portfolio'].to_parquet(buffer, compression='snappy')
|
| 576 |
-
st.session_state['
|
|
|
|
|
|
|
| 577 |
del st.session_state['portfolio'], st.session_state['export_portfolio']
|
| 578 |
|
| 579 |
# with tab2:
|
|
|
|
| 105 |
|
| 106 |
</style>""", unsafe_allow_html=True)
|
| 107 |
|
| 108 |
+
# Memory optimization helper functions
|
| 109 |
+
def chunk_name_matching(portfolio_names, csv_names, chunk_size=1000):
|
| 110 |
+
"""Process name matching in chunks to reduce memory usage"""
|
| 111 |
+
portfolio_match_dict = {}
|
| 112 |
+
unmatched_names = []
|
| 113 |
+
|
| 114 |
+
for i in range(0, len(portfolio_names), chunk_size):
|
| 115 |
+
chunk = portfolio_names[i:i+chunk_size]
|
| 116 |
+
for portfolio_name in chunk:
|
| 117 |
+
match = process.extractOne(
|
| 118 |
+
portfolio_name,
|
| 119 |
+
csv_names,
|
| 120 |
+
score_cutoff=87
|
| 121 |
+
)
|
| 122 |
+
if match:
|
| 123 |
+
portfolio_match_dict[portfolio_name] = match[0]
|
| 124 |
+
if match[1] < 100:
|
| 125 |
+
st.write(f"{portfolio_name} matched from portfolio to site csv {match[0]} with a score of {match[1]}%")
|
| 126 |
+
else:
|
| 127 |
+
portfolio_match_dict[portfolio_name] = portfolio_name
|
| 128 |
+
unmatched_names.append(portfolio_name)
|
| 129 |
+
|
| 130 |
+
return portfolio_match_dict, unmatched_names
|
| 131 |
+
|
| 132 |
+
def optimize_dataframe_dtypes(df):
|
| 133 |
+
"""Optimize DataFrame data types for memory efficiency"""
|
| 134 |
+
for col in df.columns:
|
| 135 |
+
if df[col].dtype == 'object':
|
| 136 |
+
# Try to convert to category if many duplicates
|
| 137 |
+
if df[col].nunique() / len(df) < 0.5:
|
| 138 |
+
df[col] = df[col].astype('category')
|
| 139 |
+
return df
|
| 140 |
+
|
| 141 |
+
def create_memory_efficient_mappings(projections_df, site_var, type_var, sport_var):
|
| 142 |
+
"""Create mappings with optimized data types"""
|
| 143 |
+
# Optimize projections data types first
|
| 144 |
+
projections_df = projections_df.copy()
|
| 145 |
+
|
| 146 |
+
# Convert to more efficient data types
|
| 147 |
+
if 'position' in projections_df.columns:
|
| 148 |
+
projections_df['position'] = projections_df['position'].astype('category')
|
| 149 |
+
if 'team' in projections_df.columns:
|
| 150 |
+
projections_df['team'] = projections_df['team'].astype('category')
|
| 151 |
+
if 'salary' in projections_df.columns:
|
| 152 |
+
projections_df['salary'] = projections_df['salary'].astype('int32')
|
| 153 |
+
if 'median' in projections_df.columns:
|
| 154 |
+
projections_df['median'] = projections_df['median'].astype('float32')
|
| 155 |
+
if 'ownership' in projections_df.columns:
|
| 156 |
+
projections_df['ownership'] = projections_df['ownership'].astype('float32')
|
| 157 |
+
if 'captain ownership' in projections_df.columns:
|
| 158 |
+
projections_df['captain ownership'] = projections_df['captain ownership'].astype('float32')
|
| 159 |
+
|
| 160 |
+
# Create base mappings
|
| 161 |
+
base_mappings = {
|
| 162 |
+
'pos_map': dict(zip(projections_df['player_names'], projections_df['position'])),
|
| 163 |
+
'team_map': dict(zip(projections_df['player_names'], projections_df['team'])),
|
| 164 |
+
'salary_map': dict(zip(projections_df['player_names'], projections_df['salary'])),
|
| 165 |
+
'proj_map': dict(zip(projections_df['player_names'], projections_df['median'])),
|
| 166 |
+
'own_map': dict(zip(projections_df['player_names'], projections_df['ownership'])),
|
| 167 |
+
'own_percent_rank': dict(zip(projections_df['player_names'], projections_df['ownership'].rank(pct=True).astype('float32')))
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
# Add site/type specific mappings
|
| 171 |
+
if site_var == 'Draftkings':
|
| 172 |
+
if type_var == 'Classic':
|
| 173 |
+
if sport_var == 'CS2' or sport_var == 'LOL':
|
| 174 |
+
base_mappings.update({
|
| 175 |
+
'cpt_salary_map': dict(zip(projections_df['player_names'], projections_df['salary'] * 1.5)),
|
| 176 |
+
'cpt_proj_map': dict(zip(projections_df['player_names'], projections_df['median'] * 1.5)),
|
| 177 |
+
'cpt_own_map': dict(zip(projections_df['player_names'], projections_df['captain ownership']))
|
| 178 |
+
})
|
| 179 |
+
else:
|
| 180 |
+
base_mappings.update({
|
| 181 |
+
'cpt_salary_map': dict(zip(projections_df['player_names'], projections_df['salary'])),
|
| 182 |
+
'cpt_proj_map': dict(zip(projections_df['player_names'], projections_df['median'] * 1.5)),
|
| 183 |
+
'cpt_own_map': dict(zip(projections_df['player_names'], projections_df['captain ownership']))
|
| 184 |
+
})
|
| 185 |
+
elif type_var == 'Showdown':
|
| 186 |
+
if sport_var == 'GOLF':
|
| 187 |
+
base_mappings.update({
|
| 188 |
+
'cpt_salary_map': dict(zip(projections_df['player_names'], projections_df['salary'])),
|
| 189 |
+
'cpt_proj_map': dict(zip(projections_df['player_names'], projections_df['median'])),
|
| 190 |
+
'cpt_own_map': dict(zip(projections_df['player_names'], projections_df['ownership']))
|
| 191 |
+
})
|
| 192 |
+
else:
|
| 193 |
+
base_mappings.update({
|
| 194 |
+
'cpt_salary_map': dict(zip(projections_df['player_names'], projections_df['salary'] * 1.5)),
|
| 195 |
+
'cpt_proj_map': dict(zip(projections_df['player_names'], projections_df['median'] * 1.5)),
|
| 196 |
+
'cpt_own_map': dict(zip(projections_df['player_names'], projections_df['captain ownership']))
|
| 197 |
+
})
|
| 198 |
+
elif site_var == 'Fanduel':
|
| 199 |
+
base_mappings.update({
|
| 200 |
+
'cpt_salary_map': dict(zip(projections_df['player_names'], projections_df['salary'] * 1.5)),
|
| 201 |
+
'cpt_proj_map': dict(zip(projections_df['player_names'], projections_df['median'] * 1.5)),
|
| 202 |
+
'cpt_own_map': dict(zip(projections_df['player_names'], projections_df['captain ownership']))
|
| 203 |
+
})
|
| 204 |
+
|
| 205 |
+
return base_mappings
|
| 206 |
+
|
| 207 |
def create_position_export_dict(column_name, csv_file, site_var, type_var, sport_var):
|
| 208 |
try:
|
| 209 |
# Remove any numbers from the column name to get the position
|
|
|
|
| 438 |
else:
|
| 439 |
stack_dict = None
|
| 440 |
if st.session_state['portfolio'] is not None:
|
| 441 |
+
|
| 442 |
+
# Optimize data types early for memory efficiency
|
| 443 |
+
st.session_state['portfolio'] = optimize_dataframe_dtypes(st.session_state['portfolio'])
|
| 444 |
|
| 445 |
st.success('Portfolio file loaded successfully!')
|
| 446 |
+
for col in st.session_state['portfolio'].select_dtypes(include=['object', 'category']).columns:
|
| 447 |
+
if st.session_state['portfolio'][col].dtype == 'category':
|
| 448 |
+
# Handle categorical columns
|
| 449 |
+
st.session_state['portfolio'][col] = st.session_state['portfolio'][col].cat.rename_categories(
|
| 450 |
+
lambda x: player_right_names_mlb.get(x, x) if x in player_wrong_names_mlb else x
|
| 451 |
+
)
|
| 452 |
+
else:
|
| 453 |
+
# Handle object columns
|
| 454 |
+
st.session_state['portfolio'][col] = st.session_state['portfolio'][col].replace(player_wrong_names_mlb)
|
| 455 |
st.dataframe(st.session_state['portfolio'].head(10))
|
| 456 |
|
| 457 |
with col3:
|
|
|
|
| 481 |
export_projections, projections = load_file(projections_file, site_var, type_var, sport_var, 'projections')
|
| 482 |
if projections is not None:
|
| 483 |
st.success('Projections file loaded successfully!')
|
| 484 |
+
|
| 485 |
+
# Optimize projections data types early
|
| 486 |
try:
|
| 487 |
projections['salary'] = projections['salary'].str.replace(',', '').str.replace('$', '').str.replace(' ', '')
|
| 488 |
st.write('replaced salary symbols')
|
|
|
|
| 493 |
st.write('replaced ownership symbols')
|
| 494 |
except:
|
| 495 |
pass
|
| 496 |
+
|
| 497 |
+
# Convert to efficient data types
|
| 498 |
+
projections['salary'] = projections['salary'].dropna().astype('int32')
|
| 499 |
+
projections['ownership'] = projections['ownership'].astype('float32')
|
| 500 |
+
|
| 501 |
if projections['captain ownership'].isna().all():
|
| 502 |
projections['CPT_Own_raw'] = (projections['ownership'] / 2) * ((100 - (100-projections['ownership']))/100)
|
| 503 |
cpt_own_var = 100 / projections['CPT_Own_raw'].sum()
|
| 504 |
projections['captain ownership'] = projections['CPT_Own_raw'] * cpt_own_var
|
| 505 |
projections = projections.drop(columns='CPT_Own_raw', axis=1)
|
| 506 |
+
|
| 507 |
+
projections['captain ownership'] = projections['captain ownership'].astype('float32')
|
| 508 |
+
projections['median'] = projections['median'].astype('float32')
|
| 509 |
+
|
| 510 |
+
# More efficient string replacement for projections
|
| 511 |
+
for col in projections.select_dtypes(include=['object']).columns:
|
| 512 |
+
projections[col] = projections[col].replace(player_wrong_names_mlb)
|
| 513 |
+
|
| 514 |
+
# Set position/team variables if needed
|
| 515 |
if position_var is not None:
|
| 516 |
projections['position'] = position_var
|
| 517 |
if team_var is not None:
|
|
|
|
| 523 |
if st.session_state['portfolio'] is not None and projections is not None:
|
| 524 |
|
| 525 |
st.subheader("Name Matching Analysis")
|
| 526 |
+
|
| 527 |
# Get unique names from portfolio
|
| 528 |
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
| 529 |
try:
|
|
|
|
| 532 |
csv_names = st.session_state['csv_file']['Nickname'].tolist()
|
| 533 |
projection_names = projections['player_names'].tolist()
|
| 534 |
|
| 535 |
+
# Use chunked name matching for memory efficiency
|
| 536 |
+
portfolio_match_dict, unmatched_names = chunk_name_matching(portfolio_names, csv_names)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
|
| 538 |
+
# Update portfolio with matched names (in-place to save memory)
|
| 539 |
+
player_columns = [col for col in st.session_state['portfolio'].columns
|
|
|
|
| 540 |
if col not in ['salary', 'median', 'Own']]
|
| 541 |
|
| 542 |
# For each player column, update names using the match dictionary
|
| 543 |
for col in player_columns:
|
| 544 |
+
st.session_state['portfolio'][col] = st.session_state['portfolio'][col].map(lambda x: portfolio_match_dict.get(x, x))
|
|
|
|
| 545 |
|
| 546 |
+
# Create match dictionary for projections to CSV names (chunked)
|
| 547 |
+
projections_match_dict, unmatched_proj_names = chunk_name_matching(projection_names, csv_names)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
|
| 549 |
# Update projections with matched names
|
| 550 |
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
| 551 |
st.session_state['projections_df'] = projections
|
| 552 |
+
|
| 553 |
+
# Second round of matching (projections to portfolio)
|
| 554 |
projections_names = st.session_state['projections_df']['player_names'].tolist()
|
| 555 |
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
| 556 |
|
| 557 |
+
projections_match_dict2, unmatched_proj_names2 = chunk_name_matching(projection_names, portfolio_names)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 558 |
|
| 559 |
# Update projections with matched names
|
| 560 |
+
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict2.get(x, x))
|
| 561 |
st.session_state['projections_df'] = projections
|
| 562 |
|
| 563 |
+
# Handle stacking if needed
|
| 564 |
if sport_var in stacking_sports:
|
| 565 |
team_dict = dict(zip(st.session_state['projections_df']['player_names'], st.session_state['projections_df']['team']))
|
| 566 |
st.session_state['portfolio']['Stack'] = st.session_state['portfolio'].apply(
|
|
|
|
| 580 |
st.session_state['stack_dict'] = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Stack']))
|
| 581 |
st.session_state['size_dict'] = dict(zip(st.session_state['portfolio'].index, st.session_state['portfolio']['Size']))
|
| 582 |
|
| 583 |
+
# Create export dictionary
|
| 584 |
try:
|
| 585 |
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Name'], st.session_state['csv_file']['Name + ID']))
|
| 586 |
except:
|
| 587 |
st.session_state['export_dict'] = dict(zip(st.session_state['csv_file']['Nickname'], st.session_state['csv_file']['Id']))
|
| 588 |
+
|
| 589 |
+
# Create memory-efficient mappings
|
| 590 |
if 'map_dict' not in st.session_state:
|
| 591 |
+
st.session_state['map_dict'] = create_memory_efficient_mappings(st.session_state['projections_df'], site_var, type_var, sport_var)
|
| 592 |
+
|
| 593 |
+
# Store portfolio in compressed format and clean up
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 594 |
st.session_state['origin_portfolio'] = st.session_state['portfolio']
|
| 595 |
buffer = io.BytesIO()
|
| 596 |
st.session_state['portfolio'].to_parquet(buffer, compression='snappy')
|
| 597 |
+
st.session_state['origin_portfolio_compressed'] = buffer.getvalue()
|
| 598 |
+
|
| 599 |
+
# Clear large objects from session state to free memory
|
| 600 |
del st.session_state['portfolio'], st.session_state['export_portfolio']
|
| 601 |
|
| 602 |
# with tab2:
|