James McCool
commited on
Commit
·
356c7d4
1
Parent(s):
b9bf803
Refactor contest data handling in `app.py` and `find_name_mismatches.py` for improved functionality
Browse files- Removed the position dictionary from the data returned by `load_file`, simplifying the data structure.
- Updated the `find_name_mismatches` function to use `contest_df` instead of `portfolio_df`, enhancing clarity in variable naming.
- Adjusted the logic in `app.py` to ensure proper handling of projections and contest data, including the addition of name matching analysis.
- app.py +10 -33
- global_func/find_name_mismatches.py +4 -5
- global_func/load_file.py +1 -2
app.py
CHANGED
|
@@ -31,7 +31,7 @@ with tab1:
|
|
| 31 |
del st.session_state['Contest']
|
| 32 |
|
| 33 |
if Contest_file:
|
| 34 |
-
st.session_state['Contest'], st.session_state['
|
| 35 |
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all')
|
| 36 |
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True)
|
| 37 |
if st.session_state['Contest'] is not None:
|
|
@@ -67,39 +67,16 @@ with tab1:
|
|
| 67 |
st.success('Projections file loaded successfully!')
|
| 68 |
st.dataframe(projections.head(10))
|
| 69 |
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
# if csv_file is not None and 'export_dict' not in st.session_state:
|
| 81 |
-
# # Create a dictionary of Name to Name+ID from csv_file
|
| 82 |
-
# try:
|
| 83 |
-
# name_id_map = dict(zip(
|
| 84 |
-
# st.session_state['csv_file']['Name'],
|
| 85 |
-
# st.session_state['csv_file']['Name + ID']
|
| 86 |
-
# ))
|
| 87 |
-
# except:
|
| 88 |
-
# name_id_map = dict(zip(
|
| 89 |
-
# st.session_state['csv_file']['Nickname'],
|
| 90 |
-
# st.session_state['csv_file']['Id']
|
| 91 |
-
# ))
|
| 92 |
-
|
| 93 |
-
# # Function to find best match
|
| 94 |
-
# def find_best_match(name):
|
| 95 |
-
# best_match = process.extractOne(name, name_id_map.keys())
|
| 96 |
-
# if best_match and best_match[1] >= 85: # 85% match threshold
|
| 97 |
-
# return name_id_map[best_match[0]]
|
| 98 |
-
# return name # Return original name if no good match found
|
| 99 |
-
|
| 100 |
-
# # Apply the matching
|
| 101 |
-
# projections['upload_match'] = projections['player_names'].apply(find_best_match)
|
| 102 |
-
# st.session_state['export_dict'] = dict(zip(projections['player_names'], projections['upload_match']))
|
| 103 |
|
| 104 |
with tab2:
|
| 105 |
if st.button('Clear data', key='reset3'):
|
|
|
|
| 31 |
del st.session_state['Contest']
|
| 32 |
|
| 33 |
if Contest_file:
|
| 34 |
+
st.session_state['Contest'], st.session_state['ownership_dict'], st.session_state['entry_list'] = load_file(Contest_file)
|
| 35 |
st.session_state['Contest'] = st.session_state['Contest'].dropna(how='all')
|
| 36 |
st.session_state['Contest'] = st.session_state['Contest'].reset_index(drop=True)
|
| 37 |
if st.session_state['Contest'] is not None:
|
|
|
|
| 67 |
st.success('Projections file loaded successfully!')
|
| 68 |
st.dataframe(projections.head(10))
|
| 69 |
|
| 70 |
+
if Contest_file and projections_file:
|
| 71 |
+
if st.session_state['Contest'] is not None and projections is not None:
|
| 72 |
+
st.subheader("Name Matching Analysis")
|
| 73 |
+
# Initialize projections_df in session state if it doesn't exist
|
| 74 |
+
if 'projections_df' not in st.session_state:
|
| 75 |
+
st.session_state['projections_df'] = projections.copy()
|
| 76 |
+
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
| 77 |
|
| 78 |
+
# Update projections_df with any new matches
|
| 79 |
+
st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
with tab2:
|
| 82 |
if st.button('Clear data', key='reset3'):
|
global_func/find_name_mismatches.py
CHANGED
|
@@ -4,12 +4,11 @@ import pandas as pd
|
|
| 4 |
import time
|
| 5 |
from fuzzywuzzy import process
|
| 6 |
|
| 7 |
-
def find_name_mismatches(
|
| 8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
| 9 |
projections_df = projections_df.copy()
|
| 10 |
|
| 11 |
-
|
| 12 |
-
portfolio_df.columns = range(col_count)
|
| 13 |
|
| 14 |
if 'player_names' not in projections_df.columns:
|
| 15 |
st.error("No 'player_names' column found in projections file")
|
|
@@ -17,8 +16,8 @@ def find_name_mismatches(portfolio_df, projections_df):
|
|
| 17 |
|
| 18 |
# Get unique player names from portfolio and projections
|
| 19 |
portfolio_players = set()
|
| 20 |
-
for col in
|
| 21 |
-
portfolio_players.update(
|
| 22 |
projection_players = set(projections_df['player_names'].unique())
|
| 23 |
projection_players_list = list(projection_players)
|
| 24 |
|
|
|
|
| 4 |
import time
|
| 5 |
from fuzzywuzzy import process
|
| 6 |
|
| 7 |
+
def find_name_mismatches(contest_df, projections_df):
|
| 8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
| 9 |
projections_df = projections_df.copy()
|
| 10 |
|
| 11 |
+
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
|
|
|
|
| 12 |
|
| 13 |
if 'player_names' not in projections_df.columns:
|
| 14 |
st.error("No 'player_names' column found in projections file")
|
|
|
|
| 16 |
|
| 17 |
# Get unique player names from portfolio and projections
|
| 18 |
portfolio_players = set()
|
| 19 |
+
for col in name_columns:
|
| 20 |
+
portfolio_players.update(contest_df[col].unique())
|
| 21 |
projection_players = set(projections_df['player_names'].unique())
|
| 22 |
projection_players_list = list(projection_players)
|
| 23 |
|
global_func/load_file.py
CHANGED
|
@@ -45,13 +45,12 @@ def load_file(upload):
|
|
| 45 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
| 46 |
# Remove position indicators from the end of each entry
|
| 47 |
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
| 48 |
-
position_dict = dict(zip(df['Player'], df['Pos']))
|
| 49 |
ownership_dict = dict(zip(df['Player'], df['Own']))
|
| 50 |
cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS'])
|
| 51 |
entry_list = list(set(df['BaseName']))
|
| 52 |
entry_list.sort()
|
| 53 |
|
| 54 |
-
return cleaned_df,
|
| 55 |
except Exception as e:
|
| 56 |
st.error(f'Error loading file: {str(e)}')
|
| 57 |
return None
|
|
|
|
| 45 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
| 46 |
# Remove position indicators from the end of each entry
|
| 47 |
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
|
|
|
| 48 |
ownership_dict = dict(zip(df['Player'], df['Own']))
|
| 49 |
cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS'])
|
| 50 |
entry_list = list(set(df['BaseName']))
|
| 51 |
entry_list.sort()
|
| 52 |
|
| 53 |
+
return cleaned_df, ownership_dict, entry_list
|
| 54 |
except Exception as e:
|
| 55 |
st.error(f'Error loading file: {str(e)}')
|
| 56 |
return None
|