| import streamlit as st |
| import numpy as np |
| import pandas as pd |
| from fuzzywuzzy import process |
|
|
| def find_csv_mismatches(csv_df, projections_df): |
| |
| csv_df = csv_df.copy() |
| projections_df = projections_df.copy() |
| |
| if 'Name' not in csv_df.columns: |
| st.error("No 'Name' column found in CSV file") |
| return csv_df |
| |
| if 'player_names' not in projections_df.columns: |
| st.error("No 'player_names' column found in projections file") |
| return csv_df |
| |
| |
| csv_players = set(csv_df['Name'].dropna().unique()) |
| projection_players = set(projections_df['player_names'].unique()) |
| projection_players_list = list(csv_players) |
| |
| |
| players_missing_from_projections = list(projection_players - csv_players) |
| |
| |
| players_to_process = [] |
| for player in players_missing_from_projections: |
| if not isinstance(player, str): |
| st.warning(f"Skipping non-string value: {player}") |
| continue |
| closest_matches = process.extract(player, projection_players_list, limit=1) |
| if closest_matches[0][1] == 100: |
| match_name = closest_matches[0][0] |
| |
| csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name |
| st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)") |
| else: |
| players_to_process.append(player) |
| |
| |
| if 'csv_current_player_index' not in st.session_state: |
| st.session_state.csv_current_player_index = 0 |
| st.session_state.csv_players_to_process = players_to_process |
| |
| |
| if players_missing_from_projections: |
| st.warning("Players in CSV but missing from projections") |
| |
| |
| remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:] |
| st.info(f"Remaining players to process ({len(remaining_players)}):\n" + |
| "\n".join(f"- {player}" for player in remaining_players)) |
| |
| if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process): |
| current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index] |
| |
| |
| closest_matches = process.extract(current_player, projection_players_list, limit=3) |
| |
| st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}") |
| |
| |
| options = [f"{match[0]} ({match[1]}%)" for match in closest_matches] |
| options.append("None of these") |
| |
| selected_option = st.radio( |
| f"Select correct match:", |
| options, |
| key=f"csv_radio_{current_player}" |
| ) |
| |
| if st.button("Confirm Selection", key="csv_confirm"): |
| if selected_option != "None of these": |
| selected_name = selected_option.split(" (")[0] |
| |
| csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name |
| st.success(f"Replaced '{current_player}' with '{selected_name}'") |
| st.session_state['csv_file'] = csv_df |
| |
| |
| st.session_state.csv_current_player_index += 1 |
| st.rerun() |
| else: |
| st.success("All players have been processed!") |
| |
| st.session_state.csv_current_player_index = 0 |
| st.session_state.csv_players_to_process = [] |
| else: |
| st.success("All CSV players found in projections!") |
| |
| return csv_df |