Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -98,11 +98,7 @@ def load_single_excel(file_content):
|
|
| 98 |
|
| 99 |
@st.cache_data
|
| 100 |
def load_and_validate_excel(file_contents_list, file_names_list, keywords_hash):
|
| 101 |
-
"""Load multiple Excel files and combine - cached to avoid re-running on filter changes
|
| 102 |
-
|
| 103 |
-
Returns:
|
| 104 |
-
tuple: (combined_df, original_count, dedup_count)
|
| 105 |
-
"""
|
| 106 |
all_dfs = []
|
| 107 |
|
| 108 |
for file_content in file_contents_list:
|
|
@@ -111,21 +107,8 @@ def load_and_validate_excel(file_contents_list, file_names_list, keywords_hash):
|
|
| 111 |
|
| 112 |
# Concatenate all dataframes
|
| 113 |
combined_df = pd.concat(all_dfs, ignore_index=True)
|
| 114 |
-
original_count = len(combined_df)
|
| 115 |
-
|
| 116 |
-
# Remove duplicates based on Tariff + Description (after combining all files)
|
| 117 |
-
# Keep first occurrence
|
| 118 |
-
dedup_cols = ["Tariff", "Description"]
|
| 119 |
-
dedup_cols = [c for c in dedup_cols if c in combined_df.columns]
|
| 120 |
-
|
| 121 |
-
if dedup_cols:
|
| 122 |
-
combined_df = combined_df.drop_duplicates(subset=dedup_cols, keep='first')
|
| 123 |
-
|
| 124 |
-
# Reset index to ensure sequential indices after deduplication
|
| 125 |
-
combined_df = combined_df.reset_index(drop=True)
|
| 126 |
|
| 127 |
-
|
| 128 |
-
return combined_df, original_count, dedup_count
|
| 129 |
|
| 130 |
|
| 131 |
@st.cache_data
|
|
@@ -288,16 +271,13 @@ with tab1:
|
|
| 288 |
for f in uploaded_files:
|
| 289 |
f.seek(0)
|
| 290 |
|
| 291 |
-
df
|
| 292 |
|
| 293 |
st.session_state.original_df = df
|
| 294 |
|
| 295 |
# Show load summary
|
| 296 |
if len(uploaded_files) > 1:
|
| 297 |
-
|
| 298 |
-
if dedup_count > 0:
|
| 299 |
-
msg += f" ({dedup_count} duplicates removed)"
|
| 300 |
-
st.success(msg)
|
| 301 |
else:
|
| 302 |
st.success(f"Loaded {len(df)} rows")
|
| 303 |
|
|
|
|
| 98 |
|
| 99 |
@st.cache_data
|
| 100 |
def load_and_validate_excel(file_contents_list, file_names_list, keywords_hash):
|
| 101 |
+
"""Load multiple Excel files and combine - cached to avoid re-running on filter changes"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
all_dfs = []
|
| 103 |
|
| 104 |
for file_content in file_contents_list:
|
|
|
|
| 107 |
|
| 108 |
# Concatenate all dataframes
|
| 109 |
combined_df = pd.concat(all_dfs, ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
return combined_df
|
|
|
|
| 112 |
|
| 113 |
|
| 114 |
@st.cache_data
|
|
|
|
| 271 |
for f in uploaded_files:
|
| 272 |
f.seek(0)
|
| 273 |
|
| 274 |
+
df = load_and_validate_excel(file_contents, file_names, keywords_hash)
|
| 275 |
|
| 276 |
st.session_state.original_df = df
|
| 277 |
|
| 278 |
# Show load summary
|
| 279 |
if len(uploaded_files) > 1:
|
| 280 |
+
st.success(f"Loaded {len(df)} rows from {len(uploaded_files)} files")
|
|
|
|
|
|
|
|
|
|
| 281 |
else:
|
| 282 |
st.success(f"Loaded {len(df)} rows")
|
| 283 |
|