Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -121,6 +121,9 @@ def load_and_validate_excel(file_contents_list, file_names_list, keywords_hash):
|
|
| 121 |
if dedup_cols:
|
| 122 |
combined_df = combined_df.drop_duplicates(subset=dedup_cols, keep='first')
|
| 123 |
|
|
|
|
|
|
|
|
|
|
| 124 |
dedup_count = original_count - len(combined_df)
|
| 125 |
return combined_df, original_count, dedup_count
|
| 126 |
|
|
@@ -345,43 +348,44 @@ with tab1:
|
|
| 345 |
)
|
| 346 |
]
|
| 347 |
|
| 348 |
-
st.write(f"**
|
| 349 |
|
| 350 |
-
# Display filtered data
|
| 351 |
if len(filtered_df) > 0:
|
| 352 |
-
#
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
filtered_df[display_cols].head(100),
|
| 360 |
-
use_container_width=True
|
| 361 |
)
|
| 362 |
|
| 363 |
-
if
|
| 364 |
-
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
st.
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
|
|
|
| 385 |
|
| 386 |
except Exception as e:
|
| 387 |
st.error(f"Error loading file: {str(e)}")
|
|
|
|
| 121 |
if dedup_cols:
|
| 122 |
combined_df = combined_df.drop_duplicates(subset=dedup_cols, keep='first')
|
| 123 |
|
| 124 |
+
# Reset index to ensure sequential indices after deduplication
|
| 125 |
+
combined_df = combined_df.reset_index(drop=True)
|
| 126 |
+
|
| 127 |
dedup_count = original_count - len(combined_df)
|
| 128 |
return combined_df, original_count, dedup_count
|
| 129 |
|
|
|
|
| 348 |
)
|
| 349 |
]
|
| 350 |
|
| 351 |
+
st.write(f"**{len(filtered_df)} of {len(df)} entries after filters**")
|
| 352 |
|
|
|
|
| 353 |
if len(filtered_df) > 0:
|
| 354 |
+
# Manual validation button
|
| 355 |
+
file_names_key = ",".join(sorted(file_names))
|
| 356 |
+
|
| 357 |
+
# Check if validation already done for these files
|
| 358 |
+
validation_done = (
|
| 359 |
+
"cached_full_results" in st.session_state and
|
| 360 |
+
st.session_state.get("cached_file_names") == file_names_key
|
|
|
|
|
|
|
| 361 |
)
|
| 362 |
|
| 363 |
+
if validation_done:
|
| 364 |
+
# Filter cached results based on current filters
|
| 365 |
+
full_results_df = st.session_state.cached_full_results
|
| 366 |
+
filtered_indices = filtered_df.index.tolist()
|
| 367 |
+
filtered_results_df = full_results_df.iloc[filtered_indices].copy()
|
| 368 |
|
| 369 |
+
st.session_state.validation_results = filtered_results_df
|
| 370 |
+
st.session_state.filtered_df = filtered_df
|
| 371 |
+
|
| 372 |
+
st.success(f"Validated {len(filtered_df)} entries. Go to 'Validation Results' tab to review.")
|
| 373 |
+
else:
|
| 374 |
+
st.session_state.filtered_df = filtered_df
|
| 375 |
+
|
| 376 |
+
if st.button("Validate", type="primary"):
|
| 377 |
+
with st.spinner("Validating all entries..."):
|
| 378 |
+
validator = get_validator()
|
| 379 |
+
full_results = validate_dataframe(df, validator)
|
| 380 |
+
full_results_df = results_to_dataframe(full_results)
|
| 381 |
+
st.session_state.cached_full_results = full_results_df
|
| 382 |
+
st.session_state.cached_file_names = file_names_key
|
| 383 |
+
|
| 384 |
+
filtered_indices = filtered_df.index.tolist()
|
| 385 |
+
filtered_results_df = full_results_df.iloc[filtered_indices].copy()
|
| 386 |
+
st.session_state.validation_results = filtered_results_df
|
| 387 |
+
|
| 388 |
+
st.rerun()
|
| 389 |
|
| 390 |
except Exception as e:
|
| 391 |
st.error(f"Error loading file: {str(e)}")
|