Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

James McCool commited on Jun 1, 2025

Commit

795a6d7

1 Parent(s): 894a694

Refactor load_contest_file function to streamline data processing

- Removed redundant position handling logic and improved the selection of essential columns for clarity.
- Introduced separate dataframes for player attributes, enhancing data organization and accessibility.
- Updated comments to reflect changes and improve code readability, contributing to ongoing efforts to enhance data handling and user experience.

Files changed (1) hide show

global_func/load_contest_file.py +8 -136

global_func/load_contest_file.py CHANGED Viewed

@@ -2,11 +2,9 @@ import streamlit as st
 import pandas as pd
 def load_contest_file(upload, sport):
-    pos_values = ['P', 'C', '1B', '2B', '3B', 'SS', 'OF']
     if upload is not None:
         try:
             try:
                 if upload.name.endswith('.csv'):
                     raw_df = pd.read_csv(upload)
                 elif upload.name.endswith(('.xls', '.xlsx')):
@@ -17,6 +15,7 @@ def load_contest_file(upload, sport):
             except:
                 raw_df = upload
             df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
             df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
@@ -25,155 +24,28 @@ def load_contest_file(upload, sport):
             df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
             df['EntryCount'] = df['EntryCount'].fillna('1/1')  # Default to 1/1 if no entry count
-            # Split the lineup string by replacing position indicators with commas
-            # We need to ensure we only replace position indicators that are at the start of a player entry
-            # and not those that might appear within player names
-            df['Lineup'] = df['Lineup'].str.replace(r'\b(' + '|'.join(pos_values) + r')\b', r'\1,', regex=True)
-            # Split into individual columns and remove position indicators
-            # First, determine the maximum number of players in any lineup
-            max_players = int(df['Lineup'].str.split(',').str.len().max())
-            if max_players <= 0:
-                st.error('No valid lineups found in the uploaded file')
-                return None
-            # Create columns for each player
-            for i in range(1, max_players):
-                df[i] = df['Lineup'].str.split(',').str[i].str.strip()
-                # Remove position indicators from the end of each entry
-                df[i] = df[i].str.replace(r'\s+(' + '|'.join(pos_values) + r')$', '', regex=True)
-                # Replace None with -1
-                df[i] = df[i].fillna('-1')
-            if sport == 'MLB':
-                df = df.rename(columns={1: '1B', 2: '2B', 3: '3B', 4: 'C', 5: 'OF1', 6: 'OF2', 7: 'OF3', 8: 'SP1', 9: 'SP2', 10: 'SS'})
             try:
                 df['Own'] = df['Own'].str.replace('%', '').astype(float)
             except:
                 df['Own'] = df['Own'].astype(float)
             ownership_df = df[['Player', 'Own']]
             fpts_df = df[['Player', 'FPTS']]
             salary_df = df[['Player', 'Salary']]
             team_df = df[['Player', 'Team']]
             pos_df = df[['Player', 'Pos']]
-            # Create position mapping dictionary
-            pos_dict = dict(zip(pos_df['Player'], pos_df['Pos']))
-            # Debug prints
-            print("\nPosition Dictionary:")
-            print(pos_dict)
-            print("\nSample Lineup String:")
-            print(df['Lineup'].iloc[0])  # Print first lineup
-            # Function to check if player is eligible for position
-            def is_eligible_for_position(player, target_pos):
-                if player not in pos_dict:
-                    print(f"Player not found in pos_dict: {player}")
-                    return False
-                player_positions = pos_dict[player].split('/')
-                print(f"Checking {player} for {target_pos}. Player positions: {player_positions}")
-                # Handle special cases
-                if target_pos.startswith('SP') and 'P' in player_positions:
-                    return True
-                if target_pos.startswith('OF') and 'OF' in player_positions:
-                    return True
-                return target_pos in player_positions
-            # Process each lineup
-            for idx, row in df.iterrows():
-                print(f"\nProcessing lineup {idx}:")
-                print(f"Original lineup string: {row['Lineup']}")
-                # First split by position indicators to preserve player names
-                lineup_parts = []
-                current_part = row['Lineup']
-                for pos in pos_values:
-                    if pos in current_part:
-                        parts = current_part.split(pos)
-                        if len(parts) > 1:
-                            lineup_parts.append(pos)  # Add the position
-                            current_part = parts[1]  # Keep the rest for further processing
-                # Now split the remaining parts by commas, but only if they're not part of a player name
-                players = []
-                current_position = None
-                for part in lineup_parts:
-                    part = part.strip()
-                    if part in pos_values:
-                        current_position = part
-                        continue
-                    # Split by comma only if it's followed by a position indicator
-                    if ',' in part:
-                        subparts = part.split(',')
-                        for subpart in subparts:
-                            subpart = subpart.strip()
-                            # Check if this subpart ends with a position
-                            has_position = any(subpart.endswith(pos) for pos in pos_values)
-                            if has_position:
-                                # This is a complete player entry
-                                for pos in pos_values:
-                                    if subpart.endswith(pos):
-                                        player = subpart[:-len(pos)].strip()
-                                        players.append((current_position, player))
-                                        current_position = pos
-                                        break
-                            else:
-                                # This might be part of a player name (like J.P., Crawford)
-                                # Combine with the next part
-                                if players:
-                                    last_pos, last_player = players[-1]
-                                    players[-1] = (last_pos, last_player + ',' + subpart)
-                                else:
-                                    players.append((current_position, subpart))
-                    else:
-                        # No comma, just clean and add
-                        for pos in pos_values:
-                            if part.endswith(pos):
-                                player = part[:-len(pos)].strip()
-                                players.append((current_position, player))
-                                current_position = pos
-                                break
-                print(f"Processed players with positions: {players}")
-                # Now fill the positions using the processed players
-                cleaned_players = [player for _, player in players]
-                # First pass: fill required positions (excluding OF)
-                required_positions = ['SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS']
-                for pos in required_positions:
-                    for position, player in players:
-                        if is_eligible_for_position(player, pos):
-                            print(f"Assigning {player} to {pos}")
-                            df.at[idx, pos] = player
-                            players.remove((position, player))
-                            break
-                    else:
-                        print(f"No player found for {pos}")
-                # Second pass: fill OF positions with remaining players
-                of_positions = ['OF1', 'OF2', 'OF3']
-                for pos in of_positions:
-                    for position, player in players:
-                        if 'OF' in pos_dict.get(player, '').split('/'):
-                            print(f"Assigning {player} to {pos}")
-                            df.at[idx, pos] = player
-                            players.remove((position, player))
-                            break
-                    else:
-                        print(f"No player found for {pos}, using -1")
-                        df.at[idx, pos] = '-1'
-            cleaned_df = df.drop(columns=['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Pos', 'Own', 'FPTS', 'Salary', 'Team'])
-            cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'SP1', 'SP2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
             entry_list = list(set(df['BaseName']))
             entry_list.sort()
             return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list
         except Exception as e:
             st.error(f'Error loading file: {str(e)}')
             return None

 import pandas as pd
 def load_contest_file(upload, sport):
     if upload is not None:
         try:
             try:
                 if upload.name.endswith('.csv'):
                     raw_df = pd.read_csv(upload)
                 elif upload.name.endswith(('.xls', '.xlsx')):
             except:
                 raw_df = upload
+            # Select and rename essential columns
             df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
             df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
             df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
             df['EntryCount'] = df['EntryCount'].fillna('1/1')  # Default to 1/1 if no entry count
+            # Convert ownership percentage to float
             try:
                 df['Own'] = df['Own'].str.replace('%', '').astype(float)
             except:
                 df['Own'] = df['Own'].astype(float)
+            # Create separate dataframes for different player attributes
             ownership_df = df[['Player', 'Own']]
             fpts_df = df[['Player', 'FPTS']]
             salary_df = df[['Player', 'Salary']]
             team_df = df[['Player', 'Team']]
             pos_df = df[['Player', 'Pos']]
+            # Create the cleaned dataframe with just the essential columns
+            cleaned_df = df[['BaseName', 'EntryCount', 'Lineup']]
+            # Get unique entry names
             entry_list = list(set(df['BaseName']))
             entry_list.sort()
             return cleaned_df, ownership_df, fpts_df, salary_df, team_df, pos_df, entry_list
         except Exception as e:
             st.error(f'Error loading file: {str(e)}')
             return None