James McCool
commited on
Commit
·
3111836
1
Parent(s):
cf86397
funny issue where a name is matching another name without need
Browse files
app.py
CHANGED
|
@@ -162,6 +162,8 @@ sport_position_lists = {
|
|
| 162 |
|
| 163 |
showdown_position_lists = ['CPT', 'FLEX']
|
| 164 |
|
|
|
|
|
|
|
| 165 |
player_wrong_names_mlb = ['Enrique Hernandez', 'Joseph Cantillo', 'Mike Soroka', 'Jakob Bauers', 'Temi Fágbénlé']
|
| 166 |
player_right_names_mlb = ['Kike Hernandez', 'Joey Cantillo', 'Michael Soroka', 'Jake Bauers', 'Temi Fagbenle']
|
| 167 |
|
|
@@ -599,7 +601,7 @@ except:
|
|
| 599 |
nhl_slate_name_lookup_fd = {}
|
| 600 |
|
| 601 |
# Memory optimization helper functions
|
| 602 |
-
def chunk_name_matching(portfolio_names, csv_names, chunk_size=1000):
|
| 603 |
"""Process name matching in chunks to reduce memory usage"""
|
| 604 |
portfolio_match_dict = {}
|
| 605 |
unmatched_names = []
|
|
@@ -607,11 +609,15 @@ def chunk_name_matching(portfolio_names, csv_names, chunk_size=1000):
|
|
| 607 |
for i in range(0, len(portfolio_names), chunk_size):
|
| 608 |
chunk = portfolio_names[i:i+chunk_size]
|
| 609 |
for portfolio_name in chunk:
|
| 610 |
-
|
| 611 |
-
portfolio_name
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
if match:
|
| 616 |
portfolio_match_dict[portfolio_name] = match[0]
|
| 617 |
if match[1] < 100:
|
|
@@ -1689,7 +1695,7 @@ if selected_tab == 'Data Load':
|
|
| 1689 |
csv_names = st.session_state['csv_file']['Nickname'].tolist()
|
| 1690 |
projection_names = projections['player_names'].tolist()
|
| 1691 |
|
| 1692 |
-
portfolio_match_dict, unmatched_names = chunk_name_matching(portfolio_names, csv_names)
|
| 1693 |
|
| 1694 |
player_columns = [col for col in st.session_state['portfolio'].columns
|
| 1695 |
if col not in ['salary', 'median', 'Own']]
|
|
@@ -1697,7 +1703,7 @@ if selected_tab == 'Data Load':
|
|
| 1697 |
for col in player_columns:
|
| 1698 |
st.session_state['portfolio'][col] = st.session_state['portfolio'][col].map(lambda x: portfolio_match_dict.get(x, x))
|
| 1699 |
|
| 1700 |
-
projections_match_dict, unmatched_proj_names = chunk_name_matching(projection_names, csv_names)
|
| 1701 |
|
| 1702 |
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
| 1703 |
st.session_state['projections_df'] = projections
|
|
@@ -1705,7 +1711,7 @@ if selected_tab == 'Data Load':
|
|
| 1705 |
projections_names = st.session_state['projections_df']['player_names'].tolist()
|
| 1706 |
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
| 1707 |
|
| 1708 |
-
projections_match_dict2, unmatched_proj_names2 = chunk_name_matching(projection_names, portfolio_names)
|
| 1709 |
|
| 1710 |
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict2.get(x, x))
|
| 1711 |
st.session_state['projections_df'] = projections
|
|
|
|
| 162 |
|
| 163 |
showdown_position_lists = ['CPT', 'FLEX']
|
| 164 |
|
| 165 |
+
ignore_rename = ['Nikola Jokic']
|
| 166 |
+
|
| 167 |
player_wrong_names_mlb = ['Enrique Hernandez', 'Joseph Cantillo', 'Mike Soroka', 'Jakob Bauers', 'Temi Fágbénlé']
|
| 168 |
player_right_names_mlb = ['Kike Hernandez', 'Joey Cantillo', 'Michael Soroka', 'Jake Bauers', 'Temi Fagbenle']
|
| 169 |
|
|
|
|
| 601 |
nhl_slate_name_lookup_fd = {}
|
| 602 |
|
| 603 |
# Memory optimization helper functions
|
| 604 |
+
def chunk_name_matching(portfolio_names, csv_names, ignore_rename, chunk_size=1000):
|
| 605 |
"""Process name matching in chunks to reduce memory usage"""
|
| 606 |
portfolio_match_dict = {}
|
| 607 |
unmatched_names = []
|
|
|
|
| 609 |
for i in range(0, len(portfolio_names), chunk_size):
|
| 610 |
chunk = portfolio_names[i:i+chunk_size]
|
| 611 |
for portfolio_name in chunk:
|
| 612 |
+
if portfolio_name in ignore_rename:
|
| 613 |
+
portfolio_match_dict[portfolio_name] = portfolio_name
|
| 614 |
+
continue
|
| 615 |
+
else:
|
| 616 |
+
match = process.extractOne(
|
| 617 |
+
portfolio_name,
|
| 618 |
+
csv_names,
|
| 619 |
+
score_cutoff=90
|
| 620 |
+
)
|
| 621 |
if match:
|
| 622 |
portfolio_match_dict[portfolio_name] = match[0]
|
| 623 |
if match[1] < 100:
|
|
|
|
| 1695 |
csv_names = st.session_state['csv_file']['Nickname'].tolist()
|
| 1696 |
projection_names = projections['player_names'].tolist()
|
| 1697 |
|
| 1698 |
+
portfolio_match_dict, unmatched_names = chunk_name_matching(portfolio_names, csv_names, ignore_rename)
|
| 1699 |
|
| 1700 |
player_columns = [col for col in st.session_state['portfolio'].columns
|
| 1701 |
if col not in ['salary', 'median', 'Own']]
|
|
|
|
| 1703 |
for col in player_columns:
|
| 1704 |
st.session_state['portfolio'][col] = st.session_state['portfolio'][col].map(lambda x: portfolio_match_dict.get(x, x))
|
| 1705 |
|
| 1706 |
+
projections_match_dict, unmatched_proj_names = chunk_name_matching(projection_names, csv_names, ignore_rename)
|
| 1707 |
|
| 1708 |
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict.get(x, x))
|
| 1709 |
st.session_state['projections_df'] = projections
|
|
|
|
| 1711 |
projections_names = st.session_state['projections_df']['player_names'].tolist()
|
| 1712 |
portfolio_names = get_portfolio_names(st.session_state['portfolio'])
|
| 1713 |
|
| 1714 |
+
projections_match_dict2, unmatched_proj_names2 = chunk_name_matching(projection_names, portfolio_names, ignore_rename)
|
| 1715 |
|
| 1716 |
projections['player_names'] = projections['player_names'].map(lambda x: projections_match_dict2.get(x, x))
|
| 1717 |
st.session_state['projections_df'] = projections
|