| """Helper functions for the quickstart walkthrough in the redaction app.""" |
|
|
| import os |
|
|
| import gradio as gr |
| import pandas as pd |
|
|
| from tools.config import ( |
| AWS_LLM_PII_OPTION, |
| AWS_PII_OPTION, |
| CHOSEN_COMPREHEND_ENTITIES, |
| CHOSEN_LLM_ENTITIES, |
| CHOSEN_REDACT_ENTITIES, |
| DEFAULT_PII_DETECTION_MODEL, |
| INFERENCE_SERVER_PII_OPTION, |
| LOCAL_OCR_MODEL_TEXT_EXTRACT_OPTION, |
| LOCAL_PII_OPTION, |
| LOCAL_TRANSFORMERS_LLM_PII_OPTION, |
| NO_REDACTION_PII_OPTION, |
| SHOW_AWS_TEXT_EXTRACTION_OPTIONS, |
| SHOW_INFERENCE_SERVER_VLM_MODEL_OPTIONS, |
| SHOW_OCR_GUI_OPTIONS, |
| SHOW_PII_IDENTIFICATION_OPTIONS, |
| TEXTRACT_TEXT_EXTRACT_OPTION, |
| ) |
| from tools.helper_functions import put_columns_in_df |
|
|
|
|
| def is_data_file_type_walkthrough(files): |
| """Check if files are data file types (xlsx, xls, csv, parquet, docx).""" |
| if not files: |
| return False |
| data_file_extensions = {".xlsx", ".xls", ".csv", ".parquet", ".docx"} |
| for file in files: |
| if file: |
| file_path = file.name if hasattr(file, "name") else str(file) |
| file_ext = os.path.splitext(file_path)[1].lower() |
| if file_ext in data_file_extensions: |
| return True |
| return False |
|
|
|
|
| def route_walkthrough_files(files): |
| """Route files from walkthrough to appropriate component and determine if data file. |
| |
| Also returns visibility updates for step 2 text extraction components: when the |
| upload is CSV/Excel (data file), those are hidden; when it is a document, they |
| follow SHOW_OCR_GUI_OPTIONS (radio) and accordions are left unchanged. |
| """ |
| if not files: |
| show_text_extract = SHOW_OCR_GUI_OPTIONS |
| return ( |
| None, |
| None, |
| False, |
| gr.Walkthrough(selected=2), |
| gr.update(visible=show_text_extract), |
| gr.update(), |
| gr.update(), |
| ) |
|
|
| is_data = is_data_file_type_walkthrough(files) |
| doc_files = [] |
| data_files = [] |
|
|
| data_file_extensions = {".xlsx", ".xls", ".csv", ".parquet", ".docx"} |
|
|
| for file in files: |
| if file: |
| file_path = file.name if hasattr(file, "name") else str(file) |
| file_ext = os.path.splitext(file_path)[1].lower() |
| if file_ext in data_file_extensions: |
| data_files.append(file) |
| else: |
| doc_files.append(file) |
|
|
| |
| show_text_extract = (not is_data) and SHOW_OCR_GUI_OPTIONS |
| if is_data: |
| text_extract_updates = ( |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| ) |
| else: |
| |
| text_extract_updates = ( |
| gr.update(visible=show_text_extract), |
| gr.update(), |
| gr.update(), |
| ) |
|
|
| if is_data: |
| return None, data_files, True, gr.Walkthrough(selected=2), *text_extract_updates |
| else: |
| return doc_files, None, False, gr.Walkthrough(selected=2), *text_extract_updates |
|
|
|
|
| def handle_step_2_next( |
| files, |
| is_data_file, |
| walkthrough_colnames_val, |
| walkthrough_excel_sheets_val, |
| text_extract_method_val, |
| ): |
| """Handle step 2 next button - populate dropdowns if data files and sync with main components.""" |
| |
| show_text_extract_method = SHOW_OCR_GUI_OPTIONS |
|
|
| if is_data_file and files: |
| |
| colnames_dropdown, excel_sheets_dropdown = put_columns_in_df(files) |
| |
| if ( |
| walkthrough_colnames_val |
| and len(walkthrough_colnames_val) > 0 |
| and walkthrough_colnames_val[0] != "Choose columns to anonymise" |
| ): |
| main_colnames_update = gr.Dropdown(value=walkthrough_colnames_val) |
| else: |
| main_colnames_update = colnames_dropdown |
|
|
| if ( |
| walkthrough_excel_sheets_val |
| and len(walkthrough_excel_sheets_val) > 0 |
| and walkthrough_excel_sheets_val[0] != "Choose Excel sheets to anonymise" |
| ): |
| main_excel_sheets_update = gr.Dropdown( |
| value=walkthrough_excel_sheets_val, visible=True |
| ) |
| else: |
| main_excel_sheets_update = excel_sheets_dropdown |
|
|
| |
| |
| if ( |
| walkthrough_colnames_val |
| and len(walkthrough_colnames_val) > 0 |
| and walkthrough_colnames_val[0] != "Choose columns to anonymise" |
| ): |
| walkthrough_colnames_update = gr.update( |
| value=walkthrough_colnames_val, visible=True |
| ) |
| else: |
| walkthrough_colnames_update = colnames_dropdown |
|
|
| |
| if ( |
| walkthrough_excel_sheets_val |
| and len(walkthrough_excel_sheets_val) > 0 |
| and walkthrough_excel_sheets_val[0] != "Choose Excel sheets to anonymise" |
| ): |
| walkthrough_excel_sheets_update = gr.update( |
| value=walkthrough_excel_sheets_val, visible=True |
| ) |
| else: |
| walkthrough_excel_sheets_update = excel_sheets_dropdown |
|
|
| |
| |
| |
| |
| return ( |
| walkthrough_colnames_update, |
| walkthrough_excel_sheets_update, |
| main_colnames_update, |
| main_excel_sheets_update, |
| gr.Radio( |
| visible=show_text_extract_method |
| ), |
| gr.Walkthrough(selected=3), |
| ) |
| else: |
| |
| |
| |
| |
| return ( |
| gr.Dropdown(visible=False), |
| gr.Dropdown(visible=False), |
| gr.Dropdown(), |
| gr.Dropdown(visible=False), |
| gr.Radio( |
| visible=show_text_extract_method |
| ), |
| gr.Walkthrough(selected=3), |
| ) |
|
|
|
|
| def _data_files_fingerprint(files): |
| """Return a stable key for the current file list to detect redundant updates.""" |
| if not files: |
| return () |
| return tuple(getattr(f, "name", str(f)) for f in files if f is not None) |
|
|
|
|
| def update_step_2_on_data_file_upload(files, is_data_file, last_processed_keys=None): |
| """Update Step 2 components when data files are uploaded. |
| |
| When last_processed_keys is provided (from gr.State), returns (colnames, sheets, new_keys) |
| and skips recomputation if files are unchanged to avoid Gradio re-firing change and |
| causing an infinite loading loop on the column dropdown. |
| """ |
| keys = _data_files_fingerprint(files) |
| if last_processed_keys is not None and keys == last_processed_keys: |
| return gr.update(), gr.update(), last_processed_keys |
|
|
| if is_data_file and files: |
| colnames_dropdown, excel_sheets_dropdown = put_columns_in_df(files) |
| if last_processed_keys is not None: |
| return colnames_dropdown, excel_sheets_dropdown, keys |
| return colnames_dropdown, excel_sheets_dropdown, keys |
| else: |
| no_op = gr.Dropdown(visible=False), gr.Dropdown(visible=False), keys |
| if last_processed_keys is not None: |
| return *no_op, () if not keys else keys |
| return no_op |
|
|
|
|
| def handle_text_extract_method_selection(text_extract_method: str): |
| """Handle text extraction method selection - show local OCR radio only if Local OCR model is selected, |
| and show AWS Textract settings only if AWS Textract is selected. |
| |
| Args: |
| text_extract_method: Selected text extraction method |
| |
| Returns: |
| Tuple of visibility updates for local OCR radio, and AWS Textract accordion |
| """ |
| |
| if isinstance(text_extract_method, str): |
| text_extract_method = text_extract_method.strip() |
| if text_extract_method is None or text_extract_method == "": |
| text_extract_method = LOCAL_OCR_MODEL_TEXT_EXTRACT_OPTION |
|
|
| |
| |
| show_local_ocr = text_extract_method == LOCAL_OCR_MODEL_TEXT_EXTRACT_OPTION |
| |
| show_aws_textract = ( |
| text_extract_method == TEXTRACT_TEXT_EXTRACT_OPTION |
| and SHOW_AWS_TEXT_EXTRACTION_OPTIONS |
| ) |
|
|
| return ( |
| gr.update(visible=show_local_ocr), |
| gr.update( |
| visible=show_aws_textract |
| ), |
| ) |
|
|
|
|
| def _is_llm_pii_method_shown_for_redaction(redaction_method, pii_method): |
| """Whether LLM PII controls (entity dropdown + custom instructions accordion) should be visible. |
| |
| Mirrors the logic in handle_redaction_method_selection for is_llm_method_init. |
| """ |
| if isinstance(redaction_method, str): |
| redaction_method = redaction_method.strip() |
| if redaction_method is None or redaction_method == "": |
| redaction_method = "Redact all PII" |
| if isinstance(pii_method, str): |
| pii_method = pii_method.strip() |
| if pii_method is None or pii_method == "": |
| pii_method = DEFAULT_PII_DETECTION_MODEL |
|
|
| is_redact_all_pii = redaction_method == "Redact all PII" |
| is_redact_selected_terms = redaction_method == "Redact selected terms" |
| is_redact_all_pii_or_selected_terms = is_redact_all_pii or is_redact_selected_terms |
| show_pii_method = ( |
| is_redact_all_pii_or_selected_terms |
| ) and SHOW_PII_IDENTIFICATION_OPTIONS |
|
|
| pii_method_for_visibility = pii_method |
| if is_redact_all_pii_or_selected_terms and pii_method == NO_REDACTION_PII_OPTION: |
| pii_method_for_visibility = DEFAULT_PII_DETECTION_MODEL |
|
|
| return show_pii_method and ( |
| pii_method_for_visibility == LOCAL_TRANSFORMERS_LLM_PII_OPTION |
| or pii_method_for_visibility == INFERENCE_SERVER_PII_OPTION |
| or pii_method_for_visibility == AWS_LLM_PII_OPTION |
| ) |
|
|
|
|
| def handle_redaction_method_selection(redaction_method: str, pii_method: str): |
| """Handle redaction method selection in Step 3 - show appropriate components based on selection.""" |
| |
| if isinstance(redaction_method, str): |
| redaction_method = redaction_method.strip() |
| if redaction_method is None or redaction_method == "": |
| redaction_method = "Redact all PII" |
| if isinstance(pii_method, str): |
| pii_method = pii_method.strip() |
| if pii_method is None or pii_method == "": |
| pii_method = DEFAULT_PII_DETECTION_MODEL |
|
|
| |
| is_redact_all_pii = redaction_method == "Redact all PII" |
| is_redact_selected_terms = redaction_method == "Redact selected terms" |
| is_redact_all_pii_or_selected_terms = is_redact_all_pii or is_redact_selected_terms |
| is_extract_text_only = ( |
| isinstance(redaction_method, str) |
| and redaction_method.strip() == "Extract text only" |
| ) |
|
|
| |
| |
| |
| pii_method_for_visibility = pii_method |
| if is_redact_all_pii_or_selected_terms and pii_method == NO_REDACTION_PII_OPTION: |
| pii_method_for_visibility = DEFAULT_PII_DETECTION_MODEL |
|
|
| |
| |
| show_pii_method = ( |
| is_redact_all_pii_or_selected_terms |
| ) and SHOW_PII_IDENTIFICATION_OPTIONS |
|
|
| |
| show_local_entities_init = show_pii_method and ( |
| pii_method_for_visibility == LOCAL_PII_OPTION |
| ) |
| show_comprehend_entities_init = show_pii_method and ( |
| pii_method_for_visibility == AWS_PII_OPTION |
| ) |
| is_llm_method_init = _is_llm_pii_method_shown_for_redaction( |
| redaction_method, pii_method |
| ) |
|
|
| |
| |
| |
|
|
| |
| pii_drop_value = None |
| if is_redact_all_pii_or_selected_terms and pii_method == NO_REDACTION_PII_OPTION: |
| pii_drop_value = DEFAULT_PII_DETECTION_MODEL |
|
|
| |
| if is_redact_selected_terms: |
| |
| local_entities_update = gr.Dropdown( |
| visible=show_local_entities_init, value=["CUSTOM"] |
| ) |
| comprehend_entities_update = gr.Dropdown( |
| visible=show_comprehend_entities_init, value=["CUSTOM"] |
| ) |
| llm_entities_update = gr.Dropdown(visible=is_llm_method_init, value=["CUSTOM"]) |
| walkthrough_pii_identification_method_drop_update = ( |
| gr.update(visible=show_pii_method, value=pii_drop_value) |
| if pii_drop_value is not None |
| else gr.update(visible=show_pii_method) |
| ) |
|
|
| elif is_redact_all_pii: |
| |
| |
| local_entities_val = ( |
| CHOSEN_REDACT_ENTITIES |
| if isinstance(CHOSEN_REDACT_ENTITIES, list) |
| else ["CUSTOM"] |
| ) |
| comprehend_entities_val = ( |
| CHOSEN_COMPREHEND_ENTITIES |
| if isinstance(CHOSEN_COMPREHEND_ENTITIES, list) |
| else ["CUSTOM"] |
| ) |
| llm_entities_val = ( |
| CHOSEN_LLM_ENTITIES if isinstance(CHOSEN_LLM_ENTITIES, list) else ["CUSTOM"] |
| ) |
| local_entities_update = gr.Dropdown( |
| visible=show_local_entities_init, value=local_entities_val |
| ) |
| comprehend_entities_update = gr.Dropdown( |
| visible=show_comprehend_entities_init, value=comprehend_entities_val |
| ) |
| llm_entities_update = gr.Dropdown( |
| visible=is_llm_method_init, value=llm_entities_val |
| ) |
| walkthrough_pii_identification_method_drop_update = ( |
| gr.update(visible=show_pii_method, value=pii_drop_value) |
| if pii_drop_value is not None |
| else gr.update(visible=show_pii_method) |
| ) |
| elif is_extract_text_only: |
| |
| local_entities_update = gr.Dropdown(visible=show_local_entities_init) |
| comprehend_entities_update = gr.Dropdown(visible=show_comprehend_entities_init) |
| llm_entities_update = gr.Dropdown(visible=is_llm_method_init) |
| walkthrough_pii_identification_method_drop_update = gr.update( |
| visible=show_pii_method, value=NO_REDACTION_PII_OPTION |
| ) |
|
|
| return ( |
| walkthrough_pii_identification_method_drop_update, |
| local_entities_update, |
| comprehend_entities_update, |
| gr.update(visible=is_llm_method_init), |
| llm_entities_update, |
| gr.update( |
| visible=is_redact_all_pii_or_selected_terms |
| ), |
| gr.update( |
| visible=is_redact_all_pii_or_selected_terms |
| ), |
| ) |
|
|
|
|
| |
| def handle_main_redaction_method_selection(redaction_method, pii_method): |
| """Wrapper that applies handle_redaction_method_selection and updates accordion visibility. |
| |
| handle_redaction_method_selection returns (for walkthrough): pii_drop, local_entities, |
| comprehend_entities, llm_accordion_visible, llm_entities, list_accordion, checkbox, num. |
| The main app expects: pii_drop, local_entities, comprehend_entities, llm_entities, |
| custom_llm_entities_accordion, list_accordion, checkbox, num, entity_accordion, terms_accordion. |
| So we remap: use inner[4] for in_redact_llm_entities and set custom_llm_entities_accordion |
| visibility (same rule as handle_main_pii_method_selection; inner gr.update() would hide the box). |
| """ |
| raw = list(handle_redaction_method_selection(redaction_method, pii_method)) |
| is_redact_all_pii = redaction_method == "Redact all PII" |
| is_redact_selected_terms = redaction_method == "Redact selected terms" |
| is_extract_text_only = ( |
| isinstance(redaction_method, str) |
| and redaction_method.strip() == "Extract text only" |
| ) |
| show_pii_method = ( |
| is_redact_all_pii or is_redact_selected_terms |
| ) and SHOW_PII_IDENTIFICATION_OPTIONS |
| show_selected_terms_lists = is_redact_selected_terms |
| is_llm_method_for_custom_instructions = _is_llm_pii_method_shown_for_redaction( |
| redaction_method, pii_method |
| ) |
| |
| |
| |
| |
| |
| results = [ |
| raw[0], |
| raw[1], |
| raw[2], |
| raw[ |
| 4 |
| ], |
| gr.update( |
| visible=is_llm_method_for_custom_instructions |
| ), |
| raw[5], |
| raw[6], |
| gr.update(visible=show_pii_method), |
| gr.update(visible=show_selected_terms_lists), |
| gr.update(value=is_extract_text_only), |
| ] |
| return results |
|
|
|
|
| def handle_pii_method_selection(pii_method: str): |
| """Handle PII method selection - show appropriate entity dropdowns.""" |
| |
| |
| if pii_method is None or (isinstance(pii_method, str) and not pii_method.strip()): |
| show_local_entities = True |
| show_comprehend_entities = False |
| is_llm_method = False |
| else: |
| |
| show_local_entities = pii_method == LOCAL_PII_OPTION |
| |
| show_comprehend_entities = pii_method == AWS_PII_OPTION |
| |
| is_llm_method = ( |
| pii_method == LOCAL_TRANSFORMERS_LLM_PII_OPTION |
| or pii_method == INFERENCE_SERVER_PII_OPTION |
| or pii_method == AWS_LLM_PII_OPTION |
| ) |
|
|
| |
| |
| |
| |
| |
| return ( |
| gr.update(visible=show_local_entities), |
| gr.update( |
| visible=show_comprehend_entities |
| ), |
| gr.update(visible=is_llm_method), |
| ) |
|
|
|
|
| def handle_pii_method_selection_tabular(pii_method: str): |
| """Handle tabular PII method selection. Updates only accordion visibility for the |
| LLM block; leaves walkthrough_in_redact_llm_entities and |
| walkthrough_custom_llm_instructions_textbox as no-ops to avoid loading spinners |
| hanging on those nested components when switching to LLM (AWS Bedrock). |
| """ |
| if pii_method is None or (isinstance(pii_method, str) and not pii_method.strip()): |
| show_local_entities = True |
| show_comprehend_entities = False |
| is_llm_method = False |
| else: |
| show_local_entities = pii_method == LOCAL_PII_OPTION |
| show_comprehend_entities = pii_method == AWS_PII_OPTION |
| is_llm_method = ( |
| pii_method == LOCAL_TRANSFORMERS_LLM_PII_OPTION |
| or pii_method == INFERENCE_SERVER_PII_OPTION |
| or pii_method == AWS_LLM_PII_OPTION |
| ) |
| return ( |
| gr.update(visible=show_local_entities), |
| gr.update(visible=show_comprehend_entities), |
| gr.update(visible=is_llm_method), |
| ) |
|
|
|
|
| def handle_step_3_next( |
| text_extract_method_val, |
| local_ocr_method_val, |
| handwrite_signature_val, |
| pii_method_val, |
| redact_entities_val, |
| redact_comprehend_entities_val, |
| redact_llm_entities_val, |
| custom_llm_instructions_val, |
| deny_list_val, |
| allow_list_val, |
| fully_redacted_list_val, |
| pii_method_tabular_val, |
| anon_strategy_val, |
| do_initial_clean_val, |
| redact_duplicate_pages_val, |
| max_fuzzy_spelling_mistakes_num_val, |
| ): |
| """Handle step 3 next button - write values to main components.""" |
| |
| text_extract_method_update = ( |
| gr.Radio(value=text_extract_method_val) |
| if text_extract_method_val |
| else gr.Radio() |
| ) |
|
|
| |
| local_ocr_update = ( |
| gr.Radio(value=local_ocr_method_val) if local_ocr_method_val else gr.Radio() |
| ) |
| handwrite_signature_update = ( |
| gr.CheckboxGroup(value=handwrite_signature_val) |
| if handwrite_signature_val |
| else gr.CheckboxGroup() |
| ) |
|
|
| |
| pii_method_update = gr.Radio(value=pii_method_val) if pii_method_val else gr.Radio() |
| |
| |
| redact_entities_update = ( |
| gr.Dropdown(value=redact_entities_val) |
| if redact_entities_val is not None |
| else gr.Dropdown() |
| ) |
| redact_comprehend_entities_update = ( |
| gr.Dropdown(value=redact_comprehend_entities_val) |
| if redact_comprehend_entities_val is not None |
| else gr.Dropdown() |
| ) |
| redact_llm_entities_update = ( |
| gr.Dropdown(value=redact_llm_entities_val) |
| if redact_llm_entities_val is not None |
| else gr.Dropdown() |
| ) |
| custom_llm_instructions_update = ( |
| gr.Textbox(value=custom_llm_instructions_val) |
| if custom_llm_instructions_val is not None |
| else gr.Textbox() |
| ) |
|
|
| |
| |
| |
| if deny_list_val is not None: |
| if isinstance(deny_list_val, pd.DataFrame): |
| deny_list_val = ( |
| deny_list_val.iloc[:, 0].tolist() if not deny_list_val.empty else [] |
| ) |
| |
| if isinstance(deny_list_val, list): |
| deny_list_val = ( |
| [str(item) for item in deny_list_val if item] if deny_list_val else [] |
| ) |
| deny_list_update = ( |
| gr.Dropdown(value=deny_list_val) if deny_list_val else gr.Dropdown() |
| ) |
| else: |
| deny_list_update = gr.Dropdown() |
|
|
| if allow_list_val is not None: |
| if isinstance(allow_list_val, pd.DataFrame): |
| allow_list_val = ( |
| allow_list_val.iloc[:, 0].tolist() if not allow_list_val.empty else [] |
| ) |
| |
| if isinstance(allow_list_val, list): |
| allow_list_val = ( |
| [str(item) for item in allow_list_val if item] if allow_list_val else [] |
| ) |
| allow_list_update = ( |
| gr.Dropdown(value=allow_list_val) if allow_list_val else gr.Dropdown() |
| ) |
| else: |
| allow_list_update = gr.Dropdown() |
|
|
| if fully_redacted_list_val is not None: |
| if isinstance(fully_redacted_list_val, pd.DataFrame): |
| fully_redacted_list_val = ( |
| fully_redacted_list_val.iloc[:, 0].tolist() |
| if not fully_redacted_list_val.empty |
| else [] |
| ) |
| |
| if isinstance(fully_redacted_list_val, list): |
| fully_redacted_list_val = ( |
| [str(item) for item in fully_redacted_list_val if item] |
| if fully_redacted_list_val |
| else [] |
| ) |
| fully_redacted_list_update = ( |
| gr.Dropdown(value=fully_redacted_list_val) |
| if fully_redacted_list_val |
| else gr.Dropdown() |
| ) |
| else: |
| fully_redacted_list_update = gr.Dropdown() |
|
|
| |
| pii_method_tabular_update = ( |
| gr.Radio(value=pii_method_tabular_val) |
| if pii_method_tabular_val is not None |
| else gr.Radio() |
| ) |
| anon_strategy_update = ( |
| gr.Radio(value=anon_strategy_val) |
| if anon_strategy_val is not None |
| else gr.Radio() |
| ) |
| do_initial_clean_update = ( |
| gr.Checkbox(value=do_initial_clean_val) |
| if do_initial_clean_val is not None |
| else gr.Checkbox() |
| ) |
|
|
| |
| redact_duplicate_pages_update = ( |
| gr.Checkbox(value=redact_duplicate_pages_val) |
| if redact_duplicate_pages_val is not None |
| else gr.Checkbox() |
| ) |
|
|
| |
| max_fuzzy_spelling_mistakes_num_update = ( |
| gr.Number(value=max_fuzzy_spelling_mistakes_num_val) |
| if max_fuzzy_spelling_mistakes_num_val is not None |
| else gr.Number() |
| ) |
|
|
| return ( |
| text_extract_method_update, |
| local_ocr_update, |
| handwrite_signature_update, |
| pii_method_update, |
| redact_entities_update, |
| redact_comprehend_entities_update, |
| redact_llm_entities_update, |
| custom_llm_instructions_update, |
| deny_list_update, |
| allow_list_update, |
| fully_redacted_list_update, |
| pii_method_tabular_update, |
| anon_strategy_update, |
| do_initial_clean_update, |
| redact_duplicate_pages_update, |
| gr.Walkthrough(selected=4), |
| max_fuzzy_spelling_mistakes_num_update, |
| ) |
|
|
|
|
| def handle_step_4_next( |
| page_min_val, |
| page_max_val, |
| textract_output_found_val, |
| relevant_ocr_output_with_words_found_val, |
| total_pdf_page_count_val, |
| estimated_aws_costs_val, |
| estimated_time_taken_val, |
| cost_code_dataframe_val, |
| cost_code_choice_val, |
| ): |
| """Handle step 4 next button - write values to main components.""" |
| |
| page_min_update = ( |
| gr.Number(value=page_min_val) if page_min_val is not None else gr.Number() |
| ) |
| page_max_update = ( |
| gr.Number(value=page_max_val) if page_max_val is not None else gr.Number() |
| ) |
|
|
| |
| textract_output_found_update = ( |
| gr.Checkbox(value=textract_output_found_val) |
| if textract_output_found_val is not None |
| else gr.Checkbox() |
| ) |
| relevant_ocr_output_with_words_found_update = ( |
| gr.Checkbox(value=relevant_ocr_output_with_words_found_val) |
| if relevant_ocr_output_with_words_found_val is not None |
| else gr.Checkbox() |
| ) |
| total_pdf_page_count_update = ( |
| gr.Number(value=total_pdf_page_count_val) |
| if total_pdf_page_count_val is not None |
| else gr.Number() |
| ) |
| estimated_aws_costs_update = ( |
| gr.Number(value=estimated_aws_costs_val) |
| if estimated_aws_costs_val is not None |
| else gr.Number() |
| ) |
| estimated_time_taken_update = ( |
| gr.Number(value=estimated_time_taken_val) |
| if estimated_time_taken_val is not None |
| else gr.Number() |
| ) |
|
|
| |
| cost_code_dataframe_update = ( |
| gr.Dataframe(value=cost_code_dataframe_val) |
| if cost_code_dataframe_val is not None |
| else gr.Dataframe() |
| ) |
| cost_code_choice_update = ( |
| gr.Dropdown(value=cost_code_choice_val) |
| if cost_code_choice_val is not None |
| else gr.Dropdown() |
| ) |
|
|
| return ( |
| page_min_update, |
| page_max_update, |
| textract_output_found_update, |
| relevant_ocr_output_with_words_found_update, |
| total_pdf_page_count_update, |
| estimated_aws_costs_update, |
| estimated_time_taken_update, |
| cost_code_dataframe_update, |
| cost_code_choice_update, |
| gr.Walkthrough(selected=5), |
| ) |
|
|
|
|
| def sync_walkthrough_outputs_to_original(summary_text, output_file_value): |
| """Sync walkthrough output components to original components. |
| |
| This function takes the outputs from the redaction process and duplicates |
| them to both walkthrough and original output components. |
| |
| Args: |
| summary_text: The output summary text |
| output_file_value: The output file value |
| |
| Returns: |
| Tuple of (walkthrough_summary, walkthrough_file, original_summary, original_file) |
| """ |
| return ( |
| summary_text, |
| output_file_value, |
| summary_text, |
| output_file_value, |
| ) |
|
|
|
|
| def sync_walkthrough_tabular_outputs_to_original(summary_text, output_file_value): |
| """Sync walkthrough tabular output components to original components. |
| |
| This function takes the outputs from the tabular redaction process and duplicates |
| them to both walkthrough and original output components. |
| |
| Args: |
| summary_text: The output summary text |
| output_file_value: The output file value |
| |
| Returns: |
| Tuple of (walkthrough_summary, walkthrough_file, original_summary, original_file) |
| """ |
| return ( |
| summary_text, |
| output_file_value, |
| summary_text, |
| output_file_value, |
| ) |
|
|
|
|
| def update_step_3_tabular_visibility(is_data_file): |
| """Update visibility of Step 3 components based on file type. |
| |
| When a data file (CSV/Excel) is chosen: show tabular options, hide document-only options. |
| When a document is chosen: show document options (PII method, duplicate pages, etc.), hide tabular options. |
| |
| Args: |
| is_data_file: Boolean indicating if uploaded file is a data file |
| |
| Returns: |
| Tuple of visibility updates for document-only and tabular components |
| """ |
| show_doc = not is_data_file |
| return ( |
| gr.update(visible=show_doc), |
| gr.update(visible=show_doc), |
| gr.update(visible=show_doc), |
| gr.update(visible=show_doc), |
| gr.update( |
| visible=is_data_file |
| ), |
| gr.update(visible=is_data_file), |
| gr.update(visible=is_data_file), |
| ) |
|
|
|
|
| def update_step_4_visibility(is_data_file): |
| """Update visibility of Step 4 components based on file type. |
| |
| Args: |
| is_data_file: Boolean indicating if uploaded file is a data file |
| |
| Returns: |
| Tuple of visibility updates for document and tabular components |
| """ |
| |
| |
| return ( |
| gr.update(visible=not is_data_file), |
| gr.update(visible=is_data_file), |
| ) |
|
|
|
|
| def handle_main_text_extract_method_selection(text_extract_method: str): |
| """Handle text extraction method selection for main components - show local OCR options only if Local OCR model is selected, |
| and show AWS Textract settings only if AWS Textract is selected. |
| |
| Args: |
| text_extract_method: Selected text extraction method |
| |
| Returns: |
| Tuple of visibility updates for local OCR accordion, inference server accordion, and AWS Textract accordion |
| """ |
| |
| if isinstance(text_extract_method, str): |
| text_extract_method = text_extract_method.strip() |
| if text_extract_method is None or text_extract_method == "": |
| text_extract_method = LOCAL_OCR_MODEL_TEXT_EXTRACT_OPTION |
|
|
| |
| |
| show_local_ocr = text_extract_method == LOCAL_OCR_MODEL_TEXT_EXTRACT_OPTION |
| |
| show_aws_textract = ( |
| text_extract_method == TEXTRACT_TEXT_EXTRACT_OPTION |
| and SHOW_AWS_TEXT_EXTRACTION_OPTIONS |
| ) |
| |
| show_inference_server = ( |
| text_extract_method == LOCAL_OCR_MODEL_TEXT_EXTRACT_OPTION |
| and SHOW_INFERENCE_SERVER_VLM_MODEL_OPTIONS |
| ) |
|
|
| return ( |
| gr.update(visible=show_local_ocr), |
| gr.update( |
| visible=show_inference_server |
| ), |
| gr.update(visible=show_aws_textract), |
| ) |
|
|
|
|
| def handle_main_pii_method_selection(pii_method): |
| """Handle PII method selection for main components - show appropriate entity dropdowns and hide all if No PII redaction is selected. |
| |
| Args: |
| pii_method: Selected PII detection method |
| |
| Returns: |
| Tuple of visibility updates for PII method dropdown, local entities accordion, comprehend entities accordion, |
| LLM entities accordion, and LLM custom instructions accordion |
| """ |
| |
| if isinstance(pii_method, str): |
| pii_method = pii_method.strip() |
| |
| if pii_method is None or pii_method == "": |
| return ( |
| gr.update(visible=True), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| ) |
|
|
| |
| is_no_redaction = pii_method == NO_REDACTION_PII_OPTION |
|
|
| |
| if is_no_redaction: |
| return ( |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| gr.update(visible=False), |
| ) |
|
|
| |
| show_local_entities = pii_method == LOCAL_PII_OPTION |
| |
| show_comprehend_entities = pii_method == AWS_PII_OPTION |
| |
| is_llm_method = ( |
| pii_method == LOCAL_TRANSFORMERS_LLM_PII_OPTION |
| or pii_method == INFERENCE_SERVER_PII_OPTION |
| or pii_method == AWS_LLM_PII_OPTION |
| ) |
|
|
| return ( |
| gr.update(visible=show_local_entities), |
| gr.update(visible=show_comprehend_entities), |
| gr.update(visible=is_llm_method), |
| gr.update(visible=is_llm_method), |
| ) |
|
|