James McCool
commited on
Commit
·
082eda6
1
Parent(s):
b4fbe69
Enhance name matching functionality in app.py and find_name_mismatches.py
Browse files- Updated the find_name_mismatches function to accept ownership and actual dictionaries, allowing for more comprehensive name matching.
- Modified app.py to handle the additional outputs from the updated function, ensuring proper session state management for ownership and actual data.
- Improved data integrity by copying ownership and actual dictionaries before modifications, maintaining original data consistency.
- app.py +1 -1
- global_func/find_name_mismatches.py +21 -2
app.py
CHANGED
|
@@ -79,7 +79,7 @@ with tab1:
|
|
| 79 |
st.session_state['projections_df'] = projections.copy()
|
| 80 |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
| 81 |
# Run name matching only once when first loading the files
|
| 82 |
-
st.session_state['Contest'], st.session_state['projections_df'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'])
|
| 83 |
|
| 84 |
with tab2:
|
| 85 |
if st.button('Clear data', key='reset3'):
|
|
|
|
| 79 |
st.session_state['projections_df'] = projections.copy()
|
| 80 |
st.session_state['projections_df']['salary'] = (st.session_state['projections_df']['salary'].astype(str).str.replace(',', '').astype(float).astype(int))
|
| 81 |
# Run name matching only once when first loading the files
|
| 82 |
+
st.session_state['Contest'], st.session_state['projections_df'], st.session_state['ownership_dict'], st.session_state['actual_dict'] = find_name_mismatches(st.session_state['Contest'], st.session_state['projections_df'], st.session_state['ownership_dict'], st.session_state['actual_dict'])
|
| 83 |
|
| 84 |
with tab2:
|
| 85 |
if st.button('Clear data', key='reset3'):
|
global_func/find_name_mismatches.py
CHANGED
|
@@ -4,10 +4,12 @@ import pandas as pd
|
|
| 4 |
import time
|
| 5 |
from fuzzywuzzy import process
|
| 6 |
|
| 7 |
-
def find_name_mismatches(contest_df, projections_df):
|
| 8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
| 9 |
projections_df = projections_df.copy()
|
| 10 |
contest_df = contest_df.copy()
|
|
|
|
|
|
|
| 11 |
|
| 12 |
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
|
| 13 |
|
|
@@ -79,12 +81,23 @@ def find_name_mismatches(contest_df, projections_df):
|
|
| 79 |
for col in name_columns:
|
| 80 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Process manual selections
|
| 83 |
for projection_name, selection in selections.items():
|
| 84 |
if selection != "None of these":
|
| 85 |
selected_name = selection.split(" (")[0]
|
| 86 |
for col in name_columns:
|
| 87 |
contest_df[col] = contest_df[col].replace(selected_name, projection_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
st.success(f"Replaced '{selected_name}' with '{projection_name}'")
|
| 89 |
|
| 90 |
st.success("All changes applied successfully!")
|
|
@@ -98,4 +111,10 @@ def find_name_mismatches(contest_df, projections_df):
|
|
| 98 |
for projection_name, contest_name in auto_matches.items():
|
| 99 |
for col in name_columns:
|
| 100 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import time
|
| 5 |
from fuzzywuzzy import process
|
| 6 |
|
| 7 |
+
def find_name_mismatches(contest_df, projections_df, ownership_dict, fpts_dict):
|
| 8 |
# Create a copy of the projections dataframe to avoid modifying the original
|
| 9 |
projections_df = projections_df.copy()
|
| 10 |
contest_df = contest_df.copy()
|
| 11 |
+
ownership_dict = ownership_dict.copy()
|
| 12 |
+
fpts_dict = fpts_dict.copy()
|
| 13 |
|
| 14 |
name_columns = [col for col in contest_df.columns if not col in ['BaseName', 'EntryCount']]
|
| 15 |
|
|
|
|
| 81 |
for col in name_columns:
|
| 82 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
| 83 |
|
| 84 |
+
if contest_name in ownership_dict:
|
| 85 |
+
ownership_dict[projection_name] = ownership_dict.pop(contest_name)
|
| 86 |
+
if contest_name in fpts_dict:
|
| 87 |
+
fpts_dict[projection_name] = fpts_dict.pop(contest_name)
|
| 88 |
+
|
| 89 |
# Process manual selections
|
| 90 |
for projection_name, selection in selections.items():
|
| 91 |
if selection != "None of these":
|
| 92 |
selected_name = selection.split(" (")[0]
|
| 93 |
for col in name_columns:
|
| 94 |
contest_df[col] = contest_df[col].replace(selected_name, projection_name)
|
| 95 |
+
|
| 96 |
+
if selected_name in ownership_dict:
|
| 97 |
+
ownership_dict[projection_name] = ownership_dict.pop(selected_name)
|
| 98 |
+
if selected_name in fpts_dict:
|
| 99 |
+
fpts_dict[projection_name] = fpts_dict.pop(selected_name)
|
| 100 |
+
|
| 101 |
st.success(f"Replaced '{selected_name}' with '{projection_name}'")
|
| 102 |
|
| 103 |
st.success("All changes applied successfully!")
|
|
|
|
| 111 |
for projection_name, contest_name in auto_matches.items():
|
| 112 |
for col in name_columns:
|
| 113 |
contest_df[col] = contest_df[col].replace(contest_name, projection_name)
|
| 114 |
+
|
| 115 |
+
if contest_name in ownership_dict:
|
| 116 |
+
ownership_dict[projection_name] = ownership_dict.pop(contest_name)
|
| 117 |
+
if contest_name in fpts_dict:
|
| 118 |
+
fpts_dict[projection_name] = fpts_dict.pop(contest_name)
|
| 119 |
+
|
| 120 |
+
return contest_df, projections_df, ownership_dict, fpts_dict
|