James McCool
commited on
Commit
·
795a6d7
1
Parent(s):
894a694
Refactor load_contest_file function to streamline data processing
Browse files- Removed redundant position handling logic and improved the selection of essential columns for clarity.
- Introduced separate dataframes for player attributes, enhancing data organization and accessibility.
- Updated comments to reflect changes and improve code readability, contributing to ongoing efforts to enhance data handling and user experience.
- global_func/load_contest_file.py +8 -136
global_func/load_contest_file.py
CHANGED
|
@@ -2,11 +2,9 @@ import streamlit as st
|
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
def load_contest_file(upload, sport):
|
| 5 |
-
pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']
|
| 6 |
if upload is not None:
|
| 7 |
try:
|
| 8 |
try:
|
| 9 |
-
|
| 10 |
if upload.name.endswith('.csv'):
|
| 11 |
raw_df = pd.read_csv(upload)
|
| 12 |
elif upload.name.endswith(('.xls', '.xlsx')):
|
|
@@ -17,6 +15,7 @@ def load_contest_file(upload, sport):
|
|
| 17 |
except:
|
| 18 |
raw_df = upload
|
| 19 |
|
|
|
|
| 20 |
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
|
| 21 |
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
|
| 22 |
|
|
@@ -25,155 +24,28 @@ def load_contest_file(upload, sport):
|
|
| 25 |
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
|
| 26 |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
|
| 27 |
|
| 28 |
-
#
|
| 29 |
-
# We need to ensure we only replace position indicators that are at the start of a player entry
|
| 30 |
-
# and not those that might appear within player names
|
| 31 |
-
df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
|
| 32 |
-
|
| 33 |
-
# Split into individual columns and remove position indicators
|
| 34 |
-
# First, determine the maximum number of players in any lineup
|
| 35 |
-
max_players = int(df['Lineup'].str.split(',').str.len().max())
|
| 36 |
-
|
| 37 |
-
if max_players <= 0:
|
| 38 |
-
st.error('No valid lineups found in the uploaded file')
|
| 39 |
-
return None
|
| 40 |
-
|
| 41 |
-
# Create columns for each player
|
| 42 |
-
for i in range(1, max_players):
|
| 43 |
-
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
| 44 |
-
# Remove position indicators from the end of each entry
|
| 45 |
-
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
| 46 |
-
# Replace None with -1
|
| 47 |
-
df[i] = df[i].fillna('-1')
|
| 48 |
-
|
| 49 |
-
if sport == 'MLB':
|
| 50 |
-
df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
|
| 51 |
try:
|
| 52 |
df['Own'] = df['Own'].str.replace('%', '').astype(float)
|
| 53 |
except:
|
| 54 |
df['Own'] = df['Own'].astype(float)
|
|
|
|
|
|
|
| 55 |
ownership_df = df[['Player', 'Own']]
|
| 56 |
fpts_df = df[['Player', 'FPTS']]
|
| 57 |
salary_df = df[['Player', 'Salary']]
|
| 58 |
team_df = df[['Player', 'Team']]
|
| 59 |
pos_df = df[['Player', 'Pos']]
|
| 60 |
|
| 61 |
-
# Create
|
| 62 |
-
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
print("\nPosition Dictionary:")
|
| 66 |
-
print(pos_dict)
|
| 67 |
-
|
| 68 |
-
print("\nSample Lineup String:")
|
| 69 |
-
print(df['Lineup'].iloc[0]) # Print first lineup
|
| 70 |
-
|
| 71 |
-
# Function to check if player is eligible for position
|
| 72 |
-
def is_eligible_for_position(player, target_pos):
|
| 73 |
-
if player not in pos_dict:
|
| 74 |
-
print(f"Player not found in pos_dict: {player}")
|
| 75 |
-
return False
|
| 76 |
-
player_positions = pos_dict[player].split('/')
|
| 77 |
-
print(f"Checking {player} for {target_pos}. Player positions: {player_positions}")
|
| 78 |
-
# Handle special cases
|
| 79 |
-
if target_pos.startswith('SP') and 'P' in player_positions:
|
| 80 |
-
return True
|
| 81 |
-
if target_pos.startswith('OF') and 'OF' in player_positions:
|
| 82 |
-
return True
|
| 83 |
-
return target_pos in player_positions
|
| 84 |
-
|
| 85 |
-
# Process each lineup
|
| 86 |
-
for idx, row in df.iterrows():
|
| 87 |
-
print(f"\nProcessing lineup {idx}:")
|
| 88 |
-
print(f"Original lineup string: {row['Lineup']}")
|
| 89 |
-
|
| 90 |
-
# First split by position indicators to preserve player names
|
| 91 |
-
lineup_parts = []
|
| 92 |
-
current_part = row['Lineup']
|
| 93 |
-
for pos in pos_values:
|
| 94 |
-
if pos in current_part:
|
| 95 |
-
parts = current_part.split(pos)
|
| 96 |
-
if len(parts) > 1:
|
| 97 |
-
lineup_parts.append(pos) # Add the position
|
| 98 |
-
current_part = parts[1] # Keep the rest for further processing
|
| 99 |
-
|
| 100 |
-
# Now split the remaining parts by commas, but only if they're not part of a player name
|
| 101 |
-
players = []
|
| 102 |
-
current_position = None
|
| 103 |
-
for part in lineup_parts:
|
| 104 |
-
part = part.strip()
|
| 105 |
-
if part in pos_values:
|
| 106 |
-
current_position = part
|
| 107 |
-
continue
|
| 108 |
-
|
| 109 |
-
# Split by comma only if it's followed by a position indicator
|
| 110 |
-
if ',' in part:
|
| 111 |
-
subparts = part.split(',')
|
| 112 |
-
for subpart in subparts:
|
| 113 |
-
subpart = subpart.strip()
|
| 114 |
-
# Check if this subpart ends with a position
|
| 115 |
-
has_position = any(subpart.endswith(pos) for pos in pos_values)
|
| 116 |
-
if has_position:
|
| 117 |
-
# This is a complete player entry
|
| 118 |
-
for pos in pos_values:
|
| 119 |
-
if subpart.endswith(pos):
|
| 120 |
-
player = subpart[:-len(pos)].strip()
|
| 121 |
-
players.append((current_position, player))
|
| 122 |
-
current_position = pos
|
| 123 |
-
break
|
| 124 |
-
else:
|
| 125 |
-
# This might be part of a player name (like J.P., Crawford)
|
| 126 |
-
# Combine with the next part
|
| 127 |
-
if players:
|
| 128 |
-
last_pos, last_player = players[-1]
|
| 129 |
-
players[-1] = (last_pos, last_player + ',' + subpart)
|
| 130 |
-
else:
|
| 131 |
-
players.append((current_position, subpart))
|
| 132 |
-
else:
|
| 133 |
-
# No comma, just clean and add
|
| 134 |
-
for pos in pos_values:
|
| 135 |
-
if part.endswith(pos):
|
| 136 |
-
player = part[:-len(pos)].strip()
|
| 137 |
-
players.append((current_position, player))
|
| 138 |
-
current_position = pos
|
| 139 |
-
break
|
| 140 |
-
|
| 141 |
-
print(f"Processed players with positions: {players}")
|
| 142 |
-
|
| 143 |
-
# Now fill the positions using the processed players
|
| 144 |
-
cleaned_players = [player for _, player in players]
|
| 145 |
-
|
| 146 |
-
# First pass: fill required positions (excluding OF)
|
| 147 |
-
required_positions = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS']
|
| 148 |
-
for pos in required_positions:
|
| 149 |
-
for position, player in players:
|
| 150 |
-
if is_eligible_for_position(player, pos):
|
| 151 |
-
print(f"Assigning {player} to {pos}")
|
| 152 |
-
df.at[idx, pos] = player
|
| 153 |
-
players.remove((position, player))
|
| 154 |
-
break
|
| 155 |
-
else:
|
| 156 |
-
print(f"No player found for {pos}")
|
| 157 |
-
|
| 158 |
-
# Second pass: fill OF positions with remaining players
|
| 159 |
-
of_positions = ['OF1', 'OF2', 'OF3']
|
| 160 |
-
for pos in of_positions:
|
| 161 |
-
for position, player in players:
|
| 162 |
-
if 'OF' in pos_dict.get(player, '').split('/'):
|
| 163 |
-
print(f"Assigning {player} to {pos}")
|
| 164 |
-
df.at[idx, pos] = player
|
| 165 |
-
players.remove((position, player))
|
| 166 |
-
break
|
| 167 |
-
else:
|
| 168 |
-
print(f"No player found for {pos}, using -1")
|
| 169 |
-
df.at[idx, pos] = '-1'
|
| 170 |
-
|
| 171 |
-
cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS', 'Salary', 'Team'])
|
| 172 |
-
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
|
| 173 |
entry_list = list(set(df['BaseName']))
|
| 174 |
entry_list.sort()
|
| 175 |
|
| 176 |
return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list
|
|
|
|
| 177 |
except Exception as e:
|
| 178 |
st.error(f'Error loading file: {str(e)}')
|
| 179 |
return None
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
def load_contest_file(upload, sport):
|
|
|
|
| 5 |
if upload is not None:
|
| 6 |
try:
|
| 7 |
try:
|
|
|
|
| 8 |
if upload.name.endswith('.csv'):
|
| 9 |
raw_df = pd.read_csv(upload)
|
| 10 |
elif upload.name.endswith(('.xls', '.xlsx')):
|
|
|
|
| 15 |
except:
|
| 16 |
raw_df = upload
|
| 17 |
|
| 18 |
+
# Select and rename essential columns
|
| 19 |
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
|
| 20 |
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
|
| 21 |
|
|
|
|
| 24 |
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
|
| 25 |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
|
| 26 |
|
| 27 |
+
# Convert ownership percentage to float
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
try:
|
| 29 |
df['Own'] = df['Own'].str.replace('%', '').astype(float)
|
| 30 |
except:
|
| 31 |
df['Own'] = df['Own'].astype(float)
|
| 32 |
+
|
| 33 |
+
# Create separate dataframes for different player attributes
|
| 34 |
ownership_df = df[['Player', 'Own']]
|
| 35 |
fpts_df = df[['Player', 'FPTS']]
|
| 36 |
salary_df = df[['Player', 'Salary']]
|
| 37 |
team_df = df[['Player', 'Team']]
|
| 38 |
pos_df = df[['Player', 'Pos']]
|
| 39 |
|
| 40 |
+
# Create the cleaned dataframe with just the essential columns
|
| 41 |
+
cleaned_df = df[['BaseName', 'EntryCount', 'Lineup']]
|
| 42 |
|
| 43 |
+
# Get unique entry names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
entry_list = list(set(df['BaseName']))
|
| 45 |
entry_list.sort()
|
| 46 |
|
| 47 |
return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list
|
| 48 |
+
|
| 49 |
except Exception as e:
|
| 50 |
st.error(f'Error loading file: {str(e)}')
|
| 51 |
return None
|