"""Helper functions for the quickstart walkthrough in the redaction app.""" import os import gradio as gr import pandas as pd from tools.config import ( AWS_LLM_PII_OPTION, AWS_PII_OPTION, CHOSEN_COMPREHEND_ENTITIES, CHOSEN_LLM_ENTITIES, CHOSEN_REDACT_ENTITIES, DEFAULT_PII_DETECTION_MODEL, INFERENCE_SERVER_PII_OPTION, LOCAL_PII_OPTION, LOCAL_TRANSFORMERS_LLM_PII_OPTION, NO_REDACTION_PII_OPTION, SHOW_AWS_TEXT_EXTRACTION_OPTIONS, SHOW_INFERENCE_SERVER_VLM_MODEL_OPTIONS, SHOW_OCR_GUI_OPTIONS, SHOW_PII_IDENTIFICATION_OPTIONS, TESSERACT_TEXT_EXTRACT_OPTION, TEXTRACT_TEXT_EXTRACT_OPTION, ) from tools.helper_functions import put_columns_in_df def is_data_file_type_walkthrough(files): """Check if files are data file types (xlsx, xls, csv, parquet, docx).""" if not files: return False data_file_extensions = {".xlsx", ".xls", ".csv", ".parquet", ".docx"} for file in files: if file: file_path = file.name if hasattr(file, "name") else str(file) file_ext = os.path.splitext(file_path)[1].lower() if file_ext in data_file_extensions: return True return False def route_walkthrough_files(files): """Route files from walkthrough to appropriate component and determine if data file. Also returns visibility updates for step 2 text extraction components: when the upload is CSV/Excel (data file), those are hidden; when it is a document, they follow SHOW_OCR_GUI_OPTIONS (radio) and accordions are left unchanged. """ if not files: show_text_extract = SHOW_OCR_GUI_OPTIONS return ( None, None, False, gr.Walkthrough(selected=2), gr.update(visible=show_text_extract), gr.update(), gr.update(), ) is_data = is_data_file_type_walkthrough(files) doc_files = [] data_files = [] data_file_extensions = {".xlsx", ".xls", ".csv", ".parquet", ".docx"} for file in files: if file: file_path = file.name if hasattr(file, "name") else str(file) file_ext = os.path.splitext(file_path)[1].lower() if file_ext in data_file_extensions: data_files.append(file) else: doc_files.append(file) # Hide text extraction options on step 2 when CSV/Excel (data file) was uploaded show_text_extract = (not is_data) and SHOW_OCR_GUI_OPTIONS if is_data: text_extract_updates = ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), ) else: # Document: show radio if enabled; leave accordions unchanged (they follow radio selection) text_extract_updates = ( gr.update(visible=show_text_extract), gr.update(), gr.update(), ) if is_data: return None, data_files, True, gr.Walkthrough(selected=2), *text_extract_updates else: return doc_files, None, False, gr.Walkthrough(selected=2), *text_extract_updates def handle_step_2_next( files, is_data_file, walkthrough_colnames_val, walkthrough_excel_sheets_val, text_extract_method_val, ): """Handle step 2 next button - populate dropdowns if data files and sync with main components.""" # Show text extraction method radio in Step 2 if SHOW_OCR_GUI_OPTIONS is True show_text_extract_method = SHOW_OCR_GUI_OPTIONS if is_data_file and files: # Use put_columns_in_df to populate dropdowns colnames_dropdown, excel_sheets_dropdown = put_columns_in_df(files) # Use the selected values from walkthrough if available, otherwise use the populated values if ( walkthrough_colnames_val and len(walkthrough_colnames_val) > 0 and walkthrough_colnames_val[0] != "Choose columns to anonymise" ): main_colnames_update = gr.Dropdown(value=walkthrough_colnames_val) else: main_colnames_update = colnames_dropdown if ( walkthrough_excel_sheets_val and len(walkthrough_excel_sheets_val) > 0 and walkthrough_excel_sheets_val[0] != "Choose Excel sheets to anonymise" ): main_excel_sheets_update = gr.Dropdown( value=walkthrough_excel_sheets_val, visible=True ) else: main_excel_sheets_update = excel_sheets_dropdown # Preserve user's column selection in walkthrough_colnames; do not overwrite with colnames_dropdown # (colnames_dropdown has value=all columns, which would reset a one-column selection to all four) if ( walkthrough_colnames_val and len(walkthrough_colnames_val) > 0 and walkthrough_colnames_val[0] != "Choose columns to anonymise" ): walkthrough_colnames_update = gr.update( value=walkthrough_colnames_val, visible=True ) else: walkthrough_colnames_update = colnames_dropdown # Preserve user's sheet selection in walkthrough_excel_sheets when they have made one if ( walkthrough_excel_sheets_val and len(walkthrough_excel_sheets_val) > 0 and walkthrough_excel_sheets_val[0] != "Choose Excel sheets to anonymise" ): walkthrough_excel_sheets_update = gr.update( value=walkthrough_excel_sheets_val, visible=True ) else: walkthrough_excel_sheets_update = excel_sheets_dropdown # Return updates for both walkthrough and main components, and advance walkthrough # Note: walkthrough_local_ocr_method_radio and walkthrough_handwrite_signature_checkbox visibility # are controlled by event handler on walkthrough_text_extract_method_radio # Note: Step 3 PII components visibility is controlled by event handler on walkthrough_redaction_method_dropdown return ( walkthrough_colnames_update, # walkthrough_colnames walkthrough_excel_sheets_update, # walkthrough_excel_sheets main_colnames_update, # in_colnames main_excel_sheets_update, # in_excel_sheets (defined in "Word or Excel/CSV files" tab) gr.Radio( visible=show_text_extract_method ), # walkthrough_text_extract_method_radio gr.Walkthrough(selected=3), # walkthrough ) else: # Return unchanged dropdowns and advance # Note: walkthrough_local_ocr_method_radio and walkthrough_handwrite_signature_checkbox visibility # are controlled by event handler on walkthrough_text_extract_method_radio # Note: Step 3 PII components visibility is controlled by event handler on walkthrough_redaction_method_dropdown return ( gr.Dropdown(visible=False), # walkthrough_colnames gr.Dropdown(visible=False), # walkthrough_excel_sheets gr.Dropdown(), # in_colnames (no change) gr.Dropdown(visible=False), # in_excel_sheets (no change) gr.Radio( visible=show_text_extract_method ), # walkthrough_text_extract_method_radio gr.Walkthrough(selected=3), # walkthrough ) def _data_files_fingerprint(files): """Return a stable key for the current file list to detect redundant updates.""" if not files: return () return tuple(getattr(f, "name", str(f)) for f in files if f is not None) def update_step_2_on_data_file_upload(files, is_data_file, last_processed_keys=None): """Update Step 2 components when data files are uploaded. When last_processed_keys is provided (from gr.State), returns (colnames, sheets, new_keys) and skips recomputation if files are unchanged to avoid Gradio re-firing change and causing an infinite loading loop on the column dropdown. """ keys = _data_files_fingerprint(files) if last_processed_keys is not None and keys == last_processed_keys: return gr.update(), gr.update(), last_processed_keys if is_data_file and files: colnames_dropdown, excel_sheets_dropdown = put_columns_in_df(files) if last_processed_keys is not None: return colnames_dropdown, excel_sheets_dropdown, keys return colnames_dropdown, excel_sheets_dropdown, keys else: no_op = gr.Dropdown(visible=False), gr.Dropdown(visible=False), keys if last_processed_keys is not None: return *no_op, () if not keys else keys return no_op def handle_text_extract_method_selection(text_extract_method: str): """Handle text extraction method selection - show local OCR radio only if Local OCR model is selected, and show AWS Textract settings only if AWS Textract is selected. Args: text_extract_method: Selected text extraction method Returns: Tuple of visibility updates for local OCR radio, and AWS Textract accordion """ # Normalize (Gradio can send None when .change() fires before sync); default so something stays visible if isinstance(text_extract_method, str): text_extract_method = text_extract_method.strip() if text_extract_method is None or text_extract_method == "": text_extract_method = TESSERACT_TEXT_EXTRACT_OPTION # Show local OCR method radio only if "Local OCR model - PDFs without selectable text" is selected # When "AWS Bedrock VLM OCR" is selected, the local OCR method is automatically set to "bedrock-vlm" but the component is hidden show_local_ocr = text_extract_method == TESSERACT_TEXT_EXTRACT_OPTION # Show AWS Textract settings accordion only if "AWS Textract service - all PDF types" is selected show_aws_textract = ( text_extract_method == TEXTRACT_TEXT_EXTRACT_OPTION and SHOW_AWS_TEXT_EXTRACTION_OPTIONS ) return ( gr.update(visible=show_local_ocr), # walkthrough_local_ocr_method_radio gr.update( visible=show_aws_textract ), # walkthrough_handwrite_signature_checkbox ) def handle_redaction_method_selection(redaction_method: str, pii_method: str): """Handle redaction method selection in Step 3 - show appropriate components based on selection.""" # Normalize inputs (Gradio can send whitespace or None when .change() fires before sync) if isinstance(redaction_method, str): redaction_method = redaction_method.strip() if redaction_method is None or redaction_method == "": redaction_method = "Redact all PII" if isinstance(pii_method, str): pii_method = pii_method.strip() if pii_method is None or pii_method == "": pii_method = DEFAULT_PII_DETECTION_MODEL # Check which redaction method is selected is_redact_all_pii = redaction_method == "Redact all PII" is_redact_selected_terms = redaction_method == "Redact selected terms" is_redact_all_pii_or_selected_terms = is_redact_all_pii or is_redact_selected_terms is_extract_text_only = ( isinstance(redaction_method, str) and redaction_method.strip() == "Extract text only" ) # When switching from "Extract text only", the PII dropdown may still be # NO_REDACTION_PII_OPTION; use DEFAULT_PII_DETECTION_MODEL so exactly one # entity dropdown is visible and the UI doesn’t show all three or none. pii_method_for_visibility = pii_method if is_redact_all_pii_or_selected_terms and pii_method == NO_REDACTION_PII_OPTION: pii_method_for_visibility = DEFAULT_PII_DETECTION_MODEL # Show PII detection settings if "Redact all PII" OR "Redact selected terms" is selected # Both options need PII detection method to determine what to redact show_pii_method = ( is_redact_all_pii_or_selected_terms ) and SHOW_PII_IDENTIFICATION_OPTIONS # Determine visibility of entity dropdowns based on PII method show_local_entities_init = show_pii_method and ( pii_method_for_visibility == LOCAL_PII_OPTION ) show_comprehend_entities_init = show_pii_method and ( pii_method_for_visibility == AWS_PII_OPTION ) is_llm_method_init = show_pii_method and ( pii_method_for_visibility == LOCAL_TRANSFORMERS_LLM_PII_OPTION or pii_method_for_visibility == INFERENCE_SERVER_PII_OPTION or pii_method_for_visibility == AWS_LLM_PII_OPTION ) # For "Extract text only", hide all components # For "Redact all PII", show PII detection components # For "Redact selected terms", show both PII detection components AND deny/allow/fully redacted list components # When we overrode pii_method for visibility, also update the PII dropdown value pii_drop_value = None if is_redact_all_pii_or_selected_terms and pii_method == NO_REDACTION_PII_OPTION: pii_drop_value = DEFAULT_PII_DETECTION_MODEL # Set entity values based on redaction method if is_redact_selected_terms: # For "Redact selected terms", only show CUSTOM entity local_entities_update = gr.Dropdown( visible=show_local_entities_init, value=["CUSTOM"] ) comprehend_entities_update = gr.Dropdown( visible=show_comprehend_entities_init, value=["CUSTOM"] ) llm_entities_update = gr.Dropdown(visible=is_llm_method_init, value=["CUSTOM"]) walkthrough_pii_identification_method_drop_update = ( gr.update(visible=show_pii_method, value=pii_drop_value) if pii_drop_value is not None else gr.update(visible=show_pii_method) ) elif is_redact_all_pii: # For "Redact all PII", use default entities # Ensure entities are lists (they should already be parsed in config.py) local_entities_val = ( CHOSEN_REDACT_ENTITIES if isinstance(CHOSEN_REDACT_ENTITIES, list) else ["CUSTOM"] ) comprehend_entities_val = ( CHOSEN_COMPREHEND_ENTITIES if isinstance(CHOSEN_COMPREHEND_ENTITIES, list) else ["CUSTOM"] ) llm_entities_val = ( CHOSEN_LLM_ENTITIES if isinstance(CHOSEN_LLM_ENTITIES, list) else ["CUSTOM"] ) local_entities_update = gr.Dropdown( visible=show_local_entities_init, value=local_entities_val ) comprehend_entities_update = gr.Dropdown( visible=show_comprehend_entities_init, value=comprehend_entities_val ) llm_entities_update = gr.Dropdown( visible=is_llm_method_init, value=llm_entities_val ) walkthrough_pii_identification_method_drop_update = ( gr.update(visible=show_pii_method, value=pii_drop_value) if pii_drop_value is not None else gr.update(visible=show_pii_method) ) elif is_extract_text_only: # For "Extract text only", just update visibility without changing value local_entities_update = gr.Dropdown(visible=show_local_entities_init) comprehend_entities_update = gr.Dropdown(visible=show_comprehend_entities_init) llm_entities_update = gr.Dropdown(visible=is_llm_method_init) walkthrough_pii_identification_method_drop_update = gr.update( visible=show_pii_method, value=NO_REDACTION_PII_OPTION ) return ( walkthrough_pii_identification_method_drop_update, # walkthrough_pii_identification_method_drop local_entities_update, # walkthrough_in_redact_entities comprehend_entities_update, # walkthrough_in_redact_comprehend_entities gr.update(visible=is_llm_method_init), # walkthrough_llm_entities_accordion llm_entities_update, # walkthrough_in_redact_llm_entities gr.update( visible=is_redact_all_pii_or_selected_terms ), # walkthrough_list_accordion gr.update( visible=is_redact_all_pii_or_selected_terms ), # walkthrough_max_fuzzy_spelling_mistakes_num ) # Update visibility of PII-related components and accordions when general redaction method is selected def handle_main_redaction_method_selection(redaction_method, pii_method): """Wrapper that applies handle_redaction_method_selection and updates accordion visibility. handle_redaction_method_selection returns (for walkthrough): pii_drop, local_entities, comprehend_entities, llm_accordion_visible, llm_entities, list_accordion, checkbox, num. The main app expects: pii_drop, local_entities, comprehend_entities, llm_entities, custom_llm_instructions_textbox, list_accordion, checkbox, num, entity_accordion, terms_accordion. So we remap: use inner[4] for in_redact_llm_entities and insert gr.update() for custom_llm_instructions_textbox (avoid applying the Dropdown value ["CUSTOM"] to the textbox). """ raw = list(handle_redaction_method_selection(redaction_method, pii_method)) is_redact_all_pii = redaction_method == "Redact all PII" is_redact_selected_terms = redaction_method == "Redact selected terms" is_extract_text_only = ( isinstance(redaction_method, str) and redaction_method.strip() == "Extract text only" ) show_pii_method = ( is_redact_all_pii or is_redact_selected_terms ) and SHOW_PII_IDENTIFICATION_OPTIONS show_selected_terms_lists = is_redact_selected_terms # Map to main app outputs: pii_drop, local_entities, comprehend_entities, llm_entities, # custom_llm_instructions_textbox (no value change), list_accordion, checkbox, num, # then entity/terms accordions, then only_extract_text_radio. # raw[3] is llm_accordion visibility (unused here); raw[4] is llm_entities. # When "Extract text only" is selected, force "Only extract text (no redaction)" checkbox to True. results = [ raw[0], # pii_identification_method_drop raw[1], # in_redact_entities raw[2], # in_redact_comprehend_entities raw[ 4 ], # in_redact_llm_entities (was wrongly going to textbox as str(["CUSTOM"]) -> "['CUSTOM']") gr.update(), # custom_llm_instructions_textbox - leave value unchanged raw[5], # walkthrough_list_accordion raw[6], # max_fuzzy_spelling_mistakes_num gr.update(visible=show_pii_method), # entity_types_to_redact_accordion gr.update(visible=show_selected_terms_lists), # terms_accordion gr.update(value=is_extract_text_only), # only_extract_text_radio ] return results def handle_pii_method_selection(pii_method: str): """Handle PII method selection - show appropriate entity dropdowns.""" # When value is None/empty (e.g. first .change() after loading an example sets the # component programmatically), avoid hiding all entity selectors by defaulting to Local. if pii_method is None or (isinstance(pii_method, str) and not pii_method.strip()): show_local_entities = True show_comprehend_entities = False is_llm_method = False else: # Check if method is Local show_local_entities = pii_method == LOCAL_PII_OPTION # Check if method is AWS Comprehend show_comprehend_entities = pii_method == AWS_PII_OPTION # Check if method is an LLM option is_llm_method = ( pii_method == LOCAL_TRANSFORMERS_LLM_PII_OPTION or pii_method == INFERENCE_SERVER_PII_OPTION or pii_method == AWS_LLM_PII_OPTION ) # Use gr.update(visible=...) only to avoid value resets that can trigger change # events on the target components and cause loading loops (e.g. tabular PII -> LLM). # Only return to the two entity dropdowns and the accordion; components inside the # accordion (walkthrough_in_redact_llm_entities, walkthrough_custom_llm_instructions_textbox) # are shown/hidden by the accordion visibility. return ( gr.update(visible=show_local_entities), # walkthrough_in_redact_entities gr.update( visible=show_comprehend_entities ), # walkthrough_in_redact_comprehend_entities gr.update(visible=is_llm_method), # walkthrough_llm_entities_accordion ) def handle_pii_method_selection_tabular(pii_method: str): """Handle tabular PII method selection. Updates only accordion visibility for the LLM block; leaves walkthrough_in_redact_llm_entities and walkthrough_custom_llm_instructions_textbox as no-ops to avoid loading spinners hanging on those nested components when switching to LLM (AWS Bedrock). """ if pii_method is None or (isinstance(pii_method, str) and not pii_method.strip()): show_local_entities = True show_comprehend_entities = False is_llm_method = False else: show_local_entities = pii_method == LOCAL_PII_OPTION show_comprehend_entities = pii_method == AWS_PII_OPTION is_llm_method = ( pii_method == LOCAL_TRANSFORMERS_LLM_PII_OPTION or pii_method == INFERENCE_SERVER_PII_OPTION or pii_method == AWS_LLM_PII_OPTION ) return ( gr.update(visible=show_local_entities), gr.update(visible=show_comprehend_entities), gr.update(visible=is_llm_method), # accordion controls visibility of LLM block ) def handle_step_3_next( text_extract_method_val, local_ocr_method_val, handwrite_signature_val, pii_method_val, redact_entities_val, redact_comprehend_entities_val, redact_llm_entities_val, custom_llm_instructions_val, deny_list_val, allow_list_val, fully_redacted_list_val, pii_method_tabular_val, anon_strategy_val, do_initial_clean_val, redact_duplicate_pages_val, max_fuzzy_spelling_mistakes_num_val, ): """Handle step 3 next button - write values to main components.""" # Update text extraction method with walkthrough value text_extract_method_update = ( gr.Radio(value=text_extract_method_val) if text_extract_method_val else gr.Radio() ) # Update OCR components with walkthrough values local_ocr_update = ( gr.Radio(value=local_ocr_method_val) if local_ocr_method_val else gr.Radio() ) handwrite_signature_update = ( gr.CheckboxGroup(value=handwrite_signature_val) if handwrite_signature_val else gr.CheckboxGroup() ) # Update PII components with walkthrough values pii_method_update = gr.Radio(value=pii_method_val) if pii_method_val else gr.Radio() # Always update dropdowns with the value, even if it's an empty list # This ensures that empty selections are correctly written to main components redact_entities_update = ( gr.Dropdown(value=redact_entities_val) if redact_entities_val is not None else gr.Dropdown() ) redact_comprehend_entities_update = ( gr.Dropdown(value=redact_comprehend_entities_val) if redact_comprehend_entities_val is not None else gr.Dropdown() ) redact_llm_entities_update = ( gr.Dropdown(value=redact_llm_entities_val) if redact_llm_entities_val is not None else gr.Dropdown() ) custom_llm_instructions_update = ( gr.Textbox(value=custom_llm_instructions_val) if custom_llm_instructions_val is not None else gr.Textbox() ) # Update deny/allow/fully redacted list components with walkthrough values # Convert DataFrame to list if needed (for backward compatibility) # Ensure all items are strings for Dropdown components if deny_list_val is not None: if isinstance(deny_list_val, pd.DataFrame): deny_list_val = ( deny_list_val.iloc[:, 0].tolist() if not deny_list_val.empty else [] ) # Ensure all items are strings if isinstance(deny_list_val, list): deny_list_val = ( [str(item) for item in deny_list_val if item] if deny_list_val else [] ) deny_list_update = ( gr.Dropdown(value=deny_list_val) if deny_list_val else gr.Dropdown() ) else: deny_list_update = gr.Dropdown() if allow_list_val is not None: if isinstance(allow_list_val, pd.DataFrame): allow_list_val = ( allow_list_val.iloc[:, 0].tolist() if not allow_list_val.empty else [] ) # Ensure all items are strings if isinstance(allow_list_val, list): allow_list_val = ( [str(item) for item in allow_list_val if item] if allow_list_val else [] ) allow_list_update = ( gr.Dropdown(value=allow_list_val) if allow_list_val else gr.Dropdown() ) else: allow_list_update = gr.Dropdown() if fully_redacted_list_val is not None: if isinstance(fully_redacted_list_val, pd.DataFrame): fully_redacted_list_val = ( fully_redacted_list_val.iloc[:, 0].tolist() if not fully_redacted_list_val.empty else [] ) # Ensure all items are strings if isinstance(fully_redacted_list_val, list): fully_redacted_list_val = ( [str(item) for item in fully_redacted_list_val if item] if fully_redacted_list_val else [] ) fully_redacted_list_update = ( gr.Dropdown(value=fully_redacted_list_val) if fully_redacted_list_val else gr.Dropdown() ) else: fully_redacted_list_update = gr.Dropdown() # Update tabular data components with walkthrough values pii_method_tabular_update = ( gr.Radio(value=pii_method_tabular_val) if pii_method_tabular_val is not None else gr.Radio() ) anon_strategy_update = ( gr.Radio(value=anon_strategy_val) if anon_strategy_val is not None else gr.Radio() ) do_initial_clean_update = ( gr.Checkbox(value=do_initial_clean_val) if do_initial_clean_val is not None else gr.Checkbox() ) # Update redact duplicate pages checkbox with walkthrough value redact_duplicate_pages_update = ( gr.Checkbox(value=redact_duplicate_pages_val) if redact_duplicate_pages_val is not None else gr.Checkbox() ) # Update max fuzzy spelling mistakes number with walkthrough value max_fuzzy_spelling_mistakes_num_update = ( gr.Number(value=max_fuzzy_spelling_mistakes_num_val) if max_fuzzy_spelling_mistakes_num_val is not None else gr.Number() ) return ( text_extract_method_update, # text_extract_method_radio local_ocr_update, # local_ocr_method_radio handwrite_signature_update, # handwrite_signature_checkbox pii_method_update, # pii_identification_method_drop redact_entities_update, # in_redact_entities redact_comprehend_entities_update, # in_redact_comprehend_entities redact_llm_entities_update, # in_redact_llm_entities custom_llm_instructions_update, # custom_llm_instructions_textbox deny_list_update, # in_deny_list_state allow_list_update, # in_allow_list_state fully_redacted_list_update, # in_fully_redacted_list_state pii_method_tabular_update, # pii_identification_method_drop_tabular anon_strategy_update, # anon_strategy do_initial_clean_update, # do_initial_clean redact_duplicate_pages_update, # redact_duplicate_pages_checkbox gr.Walkthrough(selected=4), # walkthrough max_fuzzy_spelling_mistakes_num_update, # max_fuzzy_spelling_mistakes_num ) def handle_step_4_next( page_min_val, page_max_val, textract_output_found_val, relevant_ocr_output_with_words_found_val, total_pdf_page_count_val, estimated_aws_costs_val, estimated_time_taken_val, cost_code_dataframe_val, cost_code_choice_val, ): """Handle step 4 next button - write values to main components.""" # Update page selection components page_min_update = ( gr.Number(value=page_min_val) if page_min_val is not None else gr.Number() ) page_max_update = ( gr.Number(value=page_max_val) if page_max_val is not None else gr.Number() ) # Update cost-related components (if SHOW_COSTS is True) textract_output_found_update = ( gr.Checkbox(value=textract_output_found_val) if textract_output_found_val is not None else gr.Checkbox() ) relevant_ocr_output_with_words_found_update = ( gr.Checkbox(value=relevant_ocr_output_with_words_found_val) if relevant_ocr_output_with_words_found_val is not None else gr.Checkbox() ) total_pdf_page_count_update = ( gr.Number(value=total_pdf_page_count_val) if total_pdf_page_count_val is not None else gr.Number() ) estimated_aws_costs_update = ( gr.Number(value=estimated_aws_costs_val) if estimated_aws_costs_val is not None else gr.Number() ) estimated_time_taken_update = ( gr.Number(value=estimated_time_taken_val) if estimated_time_taken_val is not None else gr.Number() ) # Update cost code components (if GET_COST_CODES or ENFORCE_COST_CODES is True) cost_code_dataframe_update = ( gr.Dataframe(value=cost_code_dataframe_val) if cost_code_dataframe_val is not None else gr.Dataframe() ) cost_code_choice_update = ( gr.Dropdown(value=cost_code_choice_val) if cost_code_choice_val is not None else gr.Dropdown() ) return ( page_min_update, # page_min page_max_update, # page_max textract_output_found_update, # textract_output_found_checkbox relevant_ocr_output_with_words_found_update, # relevant_ocr_output_with_words_found_checkbox total_pdf_page_count_update, # total_pdf_page_count estimated_aws_costs_update, # estimated_aws_costs_number estimated_time_taken_update, # estimated_time_taken_number cost_code_dataframe_update, # cost_code_dataframe cost_code_choice_update, # cost_code_choice_drop gr.Walkthrough(selected=5), # walkthrough ) def sync_walkthrough_outputs_to_original(summary_text, output_file_value): """Sync walkthrough output components to original components. This function takes the outputs from the redaction process and duplicates them to both walkthrough and original output components. Args: summary_text: The output summary text output_file_value: The output file value Returns: Tuple of (walkthrough_summary, walkthrough_file, original_summary, original_file) """ return ( summary_text, # walkthrough_redaction_output_summary_textbox output_file_value, # walkthrough_output_file summary_text, # redaction_output_summary_textbox (original) output_file_value, # output_file (original) ) def sync_walkthrough_tabular_outputs_to_original(summary_text, output_file_value): """Sync walkthrough tabular output components to original components. This function takes the outputs from the tabular redaction process and duplicates them to both walkthrough and original output components. Args: summary_text: The output summary text output_file_value: The output file value Returns: Tuple of (walkthrough_summary, walkthrough_file, original_summary, original_file) """ return ( summary_text, # walkthrough_text_output_summary output_file_value, # walkthrough_text_output_file summary_text, # text_output_summary (original) output_file_value, # text_output_file (original) ) def update_step_3_tabular_visibility(is_data_file): """Update visibility of Step 3 components based on file type. When a data file (CSV/Excel) is chosen: show tabular options, hide document-only options. When a document is chosen: show document options (PII method, duplicate pages, etc.), hide tabular options. Args: is_data_file: Boolean indicating if uploaded file is a data file Returns: Tuple of visibility updates for document-only and tabular components """ show_doc = not is_data_file return ( gr.update(visible=show_doc), # walkthrough_local_ocr_method_radio gr.update(visible=show_doc), # walkthrough_pii_identification_method_drop gr.update(visible=show_doc), # walkthrough_fully_redacted_list_state gr.update(visible=show_doc), # walkthrough_redact_duplicate_pages_checkbox gr.update( visible=is_data_file ), # walkthrough_pii_identification_method_drop_tabular gr.update(visible=is_data_file), # walkthrough_anon_strategy gr.update(visible=is_data_file), # walkthrough_do_initial_clean ) def update_step_4_visibility(is_data_file): """Update visibility of Step 4 components based on file type. Args: is_data_file: Boolean indicating if uploaded file is a data file Returns: Tuple of visibility updates for document and tabular components """ # For Row components, we need to update visibility of children # Return updates for button and both output components in each row return ( gr.update(visible=not is_data_file), # step_4_next_document_redact_btn gr.update(visible=is_data_file), # step_4_next_tabular_redact_btn ) def handle_main_text_extract_method_selection(text_extract_method: str): """Handle text extraction method selection for main components - show local OCR options only if Local OCR model is selected, and show AWS Textract settings only if AWS Textract is selected. Args: text_extract_method: Selected text extraction method Returns: Tuple of visibility updates for local OCR accordion, inference server accordion, and AWS Textract accordion """ # Normalize (Gradio can send None when .change() fires before sync); default so something stays visible if isinstance(text_extract_method, str): text_extract_method = text_extract_method.strip() if text_extract_method is None or text_extract_method == "": text_extract_method = TESSERACT_TEXT_EXTRACT_OPTION # Show local OCR method accordion only if "Local OCR model - PDFs without selectable text" is selected # When "AWS Bedrock VLM OCR" is selected, the local OCR method is automatically set to "bedrock-vlm" but the component is hidden show_local_ocr = text_extract_method == TESSERACT_TEXT_EXTRACT_OPTION # Show AWS Textract settings accordion only if "AWS Textract service - all PDF types" is selected show_aws_textract = ( text_extract_method == TEXTRACT_TEXT_EXTRACT_OPTION and SHOW_AWS_TEXT_EXTRACTION_OPTIONS ) # Show inference server VLM model accordion only if local OCR is selected (not Bedrock VLM) and the option is enabled show_inference_server = ( text_extract_method == TESSERACT_TEXT_EXTRACT_OPTION and SHOW_INFERENCE_SERVER_VLM_MODEL_OPTIONS ) return ( gr.update(visible=show_local_ocr), # local_ocr_method_accordion gr.update( visible=show_inference_server ), # inference_server_vlm_model_accordion gr.update(visible=show_aws_textract), # aws_textract_signature_accordion ) def handle_main_pii_method_selection(pii_method): """Handle PII method selection for main components - show appropriate entity dropdowns and hide all if No PII redaction is selected. Args: pii_method: Selected PII detection method Returns: Tuple of visibility updates for PII method dropdown, local entities accordion, comprehend entities accordion, LLM entities accordion, and LLM custom instructions accordion """ # Normalize string (Gradio can send whitespace) if isinstance(pii_method, str): pii_method = pii_method.strip() # When value is None/empty (e.g. .change() fired before component synced), default to Local so at least one section is visible (e.g. when user clicked Local) if pii_method is None or pii_method == "": return ( gr.update(visible=True), # local_entities gr.update(visible=False), # comprehend_entities gr.update(visible=False), # llm_entities gr.update(visible=False), # llm_custom_instructions ) # Check if "No PII redaction" is selected is_no_redaction = pii_method == NO_REDACTION_PII_OPTION # If no redaction, hide all PII-related components if is_no_redaction: return ( gr.update(visible=False), # local_entities gr.update(visible=False), # comprehend_entities gr.update(visible=False), # llm_entities gr.update(visible=False), # llm_custom_instructions ) # Check if method is Local show_local_entities = pii_method == LOCAL_PII_OPTION # Check if method is AWS Comprehend show_comprehend_entities = pii_method == AWS_PII_OPTION # Check if method is an LLM option is_llm_method = ( pii_method == LOCAL_TRANSFORMERS_LLM_PII_OPTION or pii_method == INFERENCE_SERVER_PII_OPTION or pii_method == AWS_LLM_PII_OPTION ) return ( gr.update(visible=show_local_entities), # local_entities gr.update(visible=show_comprehend_entities), # comprehend_entities gr.update(visible=is_llm_method), # llm_entities gr.update(visible=is_llm_method), # llm_custom_instructions )