joycecast commited on
Commit
d98c784
·
verified ·
1 Parent(s): 8ad3217

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -32
app.py CHANGED
@@ -121,6 +121,9 @@ def load_and_validate_excel(file_contents_list, file_names_list, keywords_hash):
121
  if dedup_cols:
122
  combined_df = combined_df.drop_duplicates(subset=dedup_cols, keep='first')
123
 
 
 
 
124
  dedup_count = original_count - len(combined_df)
125
  return combined_df, original_count, dedup_count
126
 
@@ -345,43 +348,44 @@ with tab1:
345
  )
346
  ]
347
 
348
- st.write(f"**Showing {len(filtered_df)} of {len(df)} entries**")
349
 
350
- # Display filtered data
351
  if len(filtered_df) > 0:
352
- # Show key columns
353
- display_cols = ["Entry Number", "Description", "Tariff",
354
- "Primary 1", "Primary 2", "Primary 3",
355
- "Primary 4", "Primary 5", "Primary 6"]
356
- display_cols = [c for c in display_cols if c in filtered_df.columns]
357
-
358
- st.dataframe(
359
- filtered_df[display_cols].head(100),
360
- use_container_width=True
361
  )
362
 
363
- if len(filtered_df) > 100:
364
- st.info("Showing first 100 rows.")
 
 
 
365
 
366
- # Run validation ONCE on full dataset (cached), then filter results
367
- file_names_key = ",".join(sorted(file_names))
368
- if "cached_full_results" not in st.session_state or st.session_state.get("cached_file_namess") != file_names_key:
369
- with st.spinner("Validating all entries (one-time)..."):
370
- validator = get_validator()
371
- full_results = validate_dataframe(df, validator)
372
- full_results_df = results_to_dataframe(full_results)
373
- st.session_state.cached_full_results = full_results_df
374
- st.session_state.cached_file_namess = file_names_key
375
-
376
- # Filter cached results based on current filters
377
- full_results_df = st.session_state.cached_full_results
378
- filtered_indices = filtered_df.index.tolist()
379
- filtered_results_df = full_results_df.iloc[filtered_indices].copy()
380
-
381
- st.session_state.validation_results = filtered_results_df
382
- st.session_state.filtered_df = filtered_df
383
-
384
- st.success(f"Showing {len(filtered_df)} entries. Go to 'Validation Results' tab to review.")
 
385
 
386
  except Exception as e:
387
  st.error(f"Error loading file: {str(e)}")
 
121
  if dedup_cols:
122
  combined_df = combined_df.drop_duplicates(subset=dedup_cols, keep='first')
123
 
124
+ # Reset index to ensure sequential indices after deduplication
125
+ combined_df = combined_df.reset_index(drop=True)
126
+
127
  dedup_count = original_count - len(combined_df)
128
  return combined_df, original_count, dedup_count
129
 
 
348
  )
349
  ]
350
 
351
+ st.write(f"**{len(filtered_df)} of {len(df)} entries after filters**")
352
 
 
353
  if len(filtered_df) > 0:
354
+ # Manual validation button
355
+ file_names_key = ",".join(sorted(file_names))
356
+
357
+ # Check if validation already done for these files
358
+ validation_done = (
359
+ "cached_full_results" in st.session_state and
360
+ st.session_state.get("cached_file_names") == file_names_key
 
 
361
  )
362
 
363
+ if validation_done:
364
+ # Filter cached results based on current filters
365
+ full_results_df = st.session_state.cached_full_results
366
+ filtered_indices = filtered_df.index.tolist()
367
+ filtered_results_df = full_results_df.iloc[filtered_indices].copy()
368
 
369
+ st.session_state.validation_results = filtered_results_df
370
+ st.session_state.filtered_df = filtered_df
371
+
372
+ st.success(f"Validated {len(filtered_df)} entries. Go to 'Validation Results' tab to review.")
373
+ else:
374
+ st.session_state.filtered_df = filtered_df
375
+
376
+ if st.button("Validate", type="primary"):
377
+ with st.spinner("Validating all entries..."):
378
+ validator = get_validator()
379
+ full_results = validate_dataframe(df, validator)
380
+ full_results_df = results_to_dataframe(full_results)
381
+ st.session_state.cached_full_results = full_results_df
382
+ st.session_state.cached_file_names = file_names_key
383
+
384
+ filtered_indices = filtered_df.index.tolist()
385
+ filtered_results_df = full_results_df.iloc[filtered_indices].copy()
386
+ st.session_state.validation_results = filtered_results_df
387
+
388
+ st.rerun()
389
 
390
  except Exception as e:
391
  st.error(f"Error loading file: {str(e)}")