James McCool
commited on
Commit
·
0dedc97
1
Parent(s):
3a102e0
Refine lineup string processing in load_contest_file function
Browse files- Updated regex logic to ensure position indicators are only replaced at the start of player entries and not within player names, enhancing accuracy in lineup formatting.
- Adjusted the removal of position indicators to target only those at the end of each entry, ensuring cleaner data output.
- These changes contribute to ongoing efforts to improve data integrity and user experience within the application.
global_func/load_contest_file.py
CHANGED
|
@@ -26,10 +26,12 @@ def load_contest_file(upload, sport):
|
|
| 26 |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
|
| 27 |
|
| 28 |
# Split the lineup string by replacing position indicators with commas
|
| 29 |
-
#
|
| 30 |
-
|
|
|
|
| 31 |
|
| 32 |
# Split into individual columns and remove position indicators
|
|
|
|
| 33 |
max_players = int(df['Lineup'].str.split(',').str.len().max())
|
| 34 |
|
| 35 |
if max_players <= 0:
|
|
@@ -39,8 +41,8 @@ def load_contest_file(upload, sport):
|
|
| 39 |
# Create columns for each player
|
| 40 |
for i in range(1, max_players):
|
| 41 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
| 42 |
-
#
|
| 43 |
-
df[i] = df[i].str.replace(r'\
|
| 44 |
|
| 45 |
if sport == 'MLB':
|
| 46 |
df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
|
|
|
|
| 26 |
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
|
| 27 |
|
| 28 |
# Split the lineup string by replacing position indicators with commas
|
| 29 |
+
# We need to ensure we only replace position indicators that are at the start of a player entry
|
| 30 |
+
# and not those that might appear within player names
|
| 31 |
+
df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
|
| 32 |
|
| 33 |
# Split into individual columns and remove position indicators
|
| 34 |
+
# First, determine the maximum number of players in any lineup
|
| 35 |
max_players = int(df['Lineup'].str.split(',').str.len().max())
|
| 36 |
|
| 37 |
if max_players <= 0:
|
|
|
|
| 41 |
# Create columns for each player
|
| 42 |
for i in range(1, max_players):
|
| 43 |
df[i] = df['Lineup'].str.split(',').str[i].str.strip()
|
| 44 |
+
# Remove position indicators from the end of each entry
|
| 45 |
+
df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
|
| 46 |
|
| 47 |
if sport == 'MLB':
|
| 48 |
df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
|