Spaces:

tachiwin
/

document-ocr

Running

App Files Files Community

Luis J Camargo commited on 1 day ago

Commit

4bdfa9b

1 Parent(s): 2ea14b2

refactor: Streamline PaddleOCR-VL pipeline setup using `paddlex` CLI for config generation and add new UI reference and existing OCR app files.

Browse files

Files changed (1) hide show

app.py +96 -162

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import logging
 import sys
 import yaml
 import traceback
 from typing import Dict, List, Tuple, Any, Optional
 import time
@@ -21,7 +22,10 @@ logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging
 logger = logging.getLogger("TachiwinDocOCR")
 CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
-CONFIG_FILE = "custom_pipeline_config.yaml"
 OUTPUT_DIR = "output"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -35,23 +39,16 @@ LATEX_DELIMS = [
 # --- Paddle imports and Diagnostic ---
 PADDLE_AVAILABLE = False
-PADDLEX_VERSION = "Unknown"
-PADDLEOCR_VERSION = "Unknown"
 try:
     import paddle
     import paddlex
-    from paddlex import create_pipeline
     from paddleocr import PaddleOCRVL
     PADDLE_AVAILABLE = True
-    PADDLEX_VERSION = getattr(paddlex, "__version__", "Unknown")
-    logger.info(f"Paddle libraries loaded. PaddleX version: {PADDLEX_VERSION}")
 except ImportError as e:
     logger.error(f"Import Error: {e}")
-    logger.error(traceback.format_exc())
 except Exception as e:
     logger.error(f"Unexpected error during import: {e}")
-    logger.error(traceback.format_exc())
 # --- Model Initialization ---
 pipeline = None
@@ -65,85 +62,75 @@ def setup_pipeline():
     try:
         logger.info("Starting setup_pipeline...")
-        # 1. Generate default config via CLI-like method to avoid early model download
-        # We'll use create_pipeline and then export_pipeline_config, but we need to be careful
-        # as create_pipeline might download the model immediately.
-        # If the file exists, we'll read it. If not, we'll try to create a minimal one or use paddlex CLI.
-        if not os.path.exists(CONFIG_FILE):
-            logger.info(f"Generating default configuration for PaddleOCR-VL...")
-            # Ideally: paddlex --get_pipeline_config PaddleOCR-VL
-            # We can try to get it from paddlex registry if documented
             try:
-                from paddlex.inference.pipelines import pipeline_registry
-                # This is internal, but let's try to find if we can get the default dict
-                logger.info(f"Registered pipelines: {list(pipeline_registry.keys())[:5]}...")
-            except:
-                pass
-            # Fallback: Create a temporary pipeline to export config
-            logger.info("Initializing a temporary pipeline to export default configuration...")
-            temp_pipeline = create_pipeline("PaddleOCR-VL")
-            temp_pipeline.export_pipeline_config(save_path=CONFIG_FILE)
-            logger.info(f"Default configuration exported to {CONFIG_FILE}")
         # 2. Load and Modify Config
-        logger.info(f"Loading configuration from {CONFIG_FILE}")
-        with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
             config_data = yaml.safe_load(f)
         logger.info("Modifying configuration with custom model path...")
-        # Rigorous path search and modification
-        modified = False
-        # Check standard PaddleX structure
         if 'SubModules' in config_data:
-            for sub_name, sub_cfg in config_data['SubModules'].items():
-                if sub_name == 'VLRecognition':
-                    old_path = sub_cfg.get('model_dir')
-                    sub_cfg['model_dir'] = CUSTOM_MODEL_PATH
-                    logger.info(f"Success: Updated SubModules.VLRecognition.model_dir from '{old_path}' to '{CUSTOM_MODEL_PATH}'")
-                    modified = True
-        if not modified:
-            logger.warning("Standard SubModules.VLRecognition path not found. performing deep search...")
             def deep_update(d):
                 count = 0
                 for k, v in d.items():
                     if k == 'VLRecognition' and isinstance(v, dict):
-                        old = v.get('model_dir')
                         v['model_dir'] = CUSTOM_MODEL_PATH
-                        logger.info(f"Deep search found VLRecognition. Updated model_dir from '{old}' to '{CUSTOM_MODEL_PATH}'")
                         count += 1
                     elif isinstance(v, dict):
                         count += deep_update(v)
                 return count
-            if deep_update(config_data) > 0:
-                modified = True
-        # Save modified config
-        with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
             yaml.dump(config_data, f, default_flow_style=False)
-        # 3. Log the final YAML to console as requested
-        logger.info("--- FINAL YAML CONFIGURATION ---")
-        yaml_str = yaml.dump(config_data, default_flow_style=False)
-        print(yaml_str)
-        logger.info("--- END FINAL YAML CONFIGURATION ---")
-        # 4. Initialize pipeline with modified config
-        logger.info(f"Initializing PaddleOCRVL with custom config file: {CONFIG_FILE}")
-        # Note: We use PaddleOCRVL(pipeline_config=CONFIG_FILE) as per our research
-        # If that fails, we can try create_pipeline(CONFIG_FILE)
-        try:
-            pipeline = PaddleOCRVL(pipeline_config=CONFIG_FILE)
-            logger.info("Success: PaddleOCRVL initialized with custom config.")
-        except Exception as e:
-            logger.warning(f"PaddleOCRVL(pipeline_config=...) failed: {e}. Trying create_pipeline(path_to_yaml)...")
-            pipeline = create_pipeline(CONFIG_FILE)
-            logger.info("Success: Pipeline initialized using create_pipeline(CONFIG_FILE).")
     except Exception as e:
         logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
@@ -152,8 +139,6 @@ def setup_pipeline():
 # Initial setup
 if PADDLE_AVAILABLE:
     setup_pipeline()
-else:
-    logger.error("Inference backend disabled: Paddle libraries not found.")
 # --- Helper Functions ---
@@ -208,29 +193,15 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
 # --- Inference Logic ---
 def run_inference(img_path, task_type="ocr"):
-    status_msg = ""
-    if not PADDLE_AVAILABLE:
-        status_msg = "❌ Paddle libraries not installed."
-        logger.error(status_msg)
-        return status_msg, "", "", ""
-    if pipeline is None:
-        status_msg = "❌ Pipeline failed to initialize. Check logs for details."
-        logger.error(status_msg)
-        return status_msg, "", "", ""
     if not img_path:
-        return "⚠️ Please upload an image first.", "", "", ""
     try:
         logger.info(f"--- Inference Start: {task_type} ---")
-        logger.info(f"Image: {img_path}")
-        start_time = time.time()
         output = pipeline.predict(img_path)
-        end_time = time.time()
-        logger.info(f"Inference completed in {end_time - start_time:.2f} seconds.")
         md_content = ""
         json_content = ""
@@ -241,47 +212,41 @@ def run_inference(img_path, task_type="ocr"):
         os.makedirs(run_output_dir, exist_ok=True)
         for i, res in enumerate(output):
-            logger.info(f"Processing output segment {i+1}...")
             # Save results
             res.save_to_json(save_path=run_output_dir)
             res.save_to_markdown(save_path=run_output_dir)
-            # Print to stdout
             res.print()
-            # Read files back for Gradio
-            files_found = os.listdir(run_output_dir)
-            logger.info(f"Generated files: {files_found}")
-            for file in files_found:
-                fpath = os.path.join(run_output_dir, file)
-                if file.endswith(".md"):
                     with open(fpath, 'r', encoding='utf-8') as f:
                         md_content += f.read() + "\n\n"
-                elif file.endswith(".json"):
                     with open(fpath, 'r', encoding='utf-8') as f:
                         json_content += f.read() + "\n\n"
-                elif file.endswith((".png", ".jpg", ".jpeg")) and ("res" in file or "vis" in file):
                     vis_src = image_to_base64_data_url(fpath)
-                    vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background: white;">'
-                    vis_html += f'<div style="background: #10b981; color: white; padding: 5px 15px; font-weight: bold;">Visualization {i+1}</div>'
                     vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
                     vis_html += f'</div>'
         if not md_content:
-            md_content = "⚠️ OCR finished but no text was extracted."
         md_preview = _escape_inequalities_in_math(md_content)
-        logger.info("--- Inference Finished Successfully ---")
         return md_preview, md_content, vis_html, json_content
     except Exception as e:
-        err_detail = traceback.format_exc()
         logger.error(f"Inference Error: {e}")
-        logger.error(err_detail)
-        return f"❌ Error: {str(e)}\n\nCheck logs for more details.", "", "", ""
 # --- UI Components ---
 custom_css = """
 body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
@@ -292,84 +257,64 @@ body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
     color: white;
     border-radius: 1.5rem;
     margin-bottom: 2rem;
-    box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
 }
-.app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; margin-bottom: 0.5rem; }
-.app-header p { font-size: 1.25rem; opacity: 0.95; }
 .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
-.quick-links { display: flex; justify-content: center; gap: 1.5rem; margin-bottom: 2rem; font-weight: 600; }
-.quick-links a { color: #0284c7; text-decoration: none; transition: color 0.2s; }
-.quick-links a:hover { color: #0369a1; text-decoration: underline; }
-.output-box { border-radius: 1rem !important; border: 1px solid #e2e8f0 !important; }
-.status-indicator { font-family: monospace; font-size: 0.875rem; color: #64748b; margin-top: 0.5rem; }
 """
 with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
-    # Diagnostic Info
-    gr.HTML(f"""<div style="display:none">Paddle Status: {PADDLE_AVAILABLE}, X: {PADDLEX_VERSION}</div>""")
-    # Branding Header
     gr.HTML(
         """
         <div class="app-header">
             <h1>🌎 Tachiwin Document Parsing OCR 🦡</h1>
-            <p>Empowering the Indigenous Languages of Mexico through State-of-the-Art OCR</p>
         </div>
         """
     )
     with gr.Row(elem_classes=["notice"]):
-        gr.Markdown(f"""
-        **🚀 Engine Status:** Using **PaddleOCRVL 1.5** with custom weights: `{CUSTOM_MODEL_PATH}`.
-        Supported Languages: 68 Official Mexican Indigenous Languages.
-        """)
-    with gr.Row(elem_classes=["quick-links"]):
-        gr.HTML('<a href="https://github.com/ljcamargo/tachiwin_paddleocrvl_finetuning" target="_blank">💻 GitHub</a>')
-        gr.HTML('<a href="https://huggingface.co/tachiwin/PaddleOCR-VL-Tachiwin-BF16" target="_blank">🤗 Model Repo</a>')
-        gr.HTML('<a href="https://www.paddleocr.com" target="_blank">📚 Documentation</a>')
     with gr.Tabs():
-        # --- Tab 1: Document Parsing ---
         with gr.Tab("📄 Full Document Parsing"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_doc = gr.File(label="Upload Image", file_count="single", type="filepath", file_types=["image"])
-                    preview_doc_html = gr.HTML(value="", elem_id="image_preview_doc", visible=False)
-                    with gr.Row(variant="panel"):
-                        btn_parse = gr.Button("🔍 Start Parsing", variant="primary", scale=2)
-                        with gr.Column(scale=1):
-                            chart_switch = gr.Checkbox(label="Chart OCR", value=True)
-                            unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("📝 Markdown View"):
                             md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
                         with gr.Tab("🖼️ Visual Results"):
-                            vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Upload and parse to see visual results.</div>')
-                        with gr.Tab("📜 Markdown Source"):
                             md_raw_doc = gr.Code(language="markdown")
             file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
             def parse_doc_wrapper(fp, ch, uw):
-                return run_inference(fp, task_type="Document Parsing")[:3] # Returns Preview, Vis, Raw
             btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
-        # --- Tab 2: Element Recognition ---
         with gr.Tab("🧩 Specific Recognition"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_vl = gr.File(label="Upload Element", file_count="single", type="filepath", file_types=["image"])
-                    preview_vl_html = gr.HTML(value="", elem_id="image_preview_vl", visible=False)
                     with gr.Row():
                         btn_ocr = gr.Button("Text OCR", variant="secondary")
-                        btn_formula = gr.Button("Math Formula", variant="secondary")
-                    with gr.Row():
-                        btn_table = gr.Button("Table Data", variant="secondary")
-                        btn_chart = gr.Button("Chart Data", variant="secondary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
@@ -384,21 +329,21 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
                 res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
                 return res_preview, res_raw
-            for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table"), (btn_chart, "Chart")]:
                 btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
-        # --- Tab 3: Spotting ---
         with gr.Tab("📍 Feature Spotting"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_spot = gr.File(label="Target Image", file_count="single", type="filepath", file_types=["image"])
-                    preview_spot_html = gr.HTML(value="", elem_id="image_preview_spot", visible=False)
                     btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("🖼️ Detection"):
-                            vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes will appear here.</div>')
                         with gr.Tab("💾 JSON Feed"):
                             json_spot = gr.Code(label="JSON", language="json")
@@ -410,18 +355,7 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
             btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
-    # Footer
-    gr.Markdown(
-        """
-        ---
-        ### 🌎 Tachiwin Project 🦡
-        Dedicated to bridging the digital divide for the 68 officially recognized indigenous languages of Mexico.
-        **Supported Families:** Uto-Aztecan, Mayan, Oto-Manguean, Totonac-Tepehua, Mixe-Zoque, and more.
-        *Linguistic rights are human rights.*
-        """
-    )
 if __name__ == "__main__":
     demo.queue().launch()

 import sys
 import yaml
 import traceback
+import subprocess
 from typing import Dict, List, Tuple, Any, Optional
 import time
 logger = logging.getLogger("TachiwinDocOCR")
 CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
+# The CLI generated filename is usually {pipeline_name}.yaml
+INTERNAL_CONFIG_FILE = "PaddleOCR-VL.yaml"
+# Our final working file
+FINAL_CONFIG_FILE = "custom_pipeline_config.yaml"
 OUTPUT_DIR = "output"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # --- Paddle imports and Diagnostic ---
 PADDLE_AVAILABLE = False
 try:
     import paddle
     import paddlex
     from paddleocr import PaddleOCRVL
     PADDLE_AVAILABLE = True
+    logger.info(f"Paddle libraries loaded. PaddleX version: {getattr(paddlex, '__version__', 'Unknown')}")
 except ImportError as e:
     logger.error(f"Import Error: {e}")
 except Exception as e:
     logger.error(f"Unexpected error during import: {e}")
 # --- Model Initialization ---
 pipeline = None
     try:
         logger.info("Starting setup_pipeline...")
+        # 1. Generate default config via CLI
+        if not os.path.exists(FINAL_CONFIG_FILE):
+            logger.info("Generating default configuration via paddlex CLI...")
+            # Command: paddlex --get_pipeline_config PaddleOCR-VL --save_path ./
             try:
+                result = subprocess.run(
+                    ["paddlex", "--get_pipeline_config", "PaddleOCR-VL", "--save_path", "./"],
+                    capture_output=True, text=True, check=True
+                )
+                logger.info(f"CLI Output: {result.stdout}")
+            except subprocess.CalledProcessError as e:
+                logger.error(f"CLI Error: {e.stderr}")
+                # If CLI fails, we can't proceed with custom model easily without a template
+                raise e
+            # The file generated is likely PaddleOCR-VL.yaml
+            if os.path.exists(INTERNAL_CONFIG_FILE):
+                os.rename(INTERNAL_CONFIG_FILE, FINAL_CONFIG_FILE)
+                logger.info(f"Renamed {INTERNAL_CONFIG_FILE} to {FINAL_CONFIG_FILE}")
+            else:
+                logger.error(f"Expected config file {INTERNAL_CONFIG_FILE} was not found after CLI execution.")
+                # List files to see what was created
+                logger.info(f"Current directory files: {os.listdir('.')}")
+                raise FileNotFoundError(f"Config file {INTERNAL_CONFIG_FILE} not found.")
         # 2. Load and Modify Config
+        logger.info(f"Loading configuration from {FINAL_CONFIG_FILE}")
+        with open(FINAL_CONFIG_FILE, 'r', encoding='utf-8') as f:
             config_data = yaml.safe_load(f)
         logger.info("Modifying configuration with custom model path...")
+        # Search and update VLRecognition model_dir
+        updated = False
         if 'SubModules' in config_data:
+            if 'VLRecognition' in config_data['SubModules']:
+                config_data['SubModules']['VLRecognition']['model_dir'] = CUSTOM_MODEL_PATH
+                updated = True
+        if not updated:
+            # Deep search fallback
             def deep_update(d):
                 count = 0
                 for k, v in d.items():
                     if k == 'VLRecognition' and isinstance(v, dict):
                         v['model_dir'] = CUSTOM_MODEL_PATH
                         count += 1
                     elif isinstance(v, dict):
                         count += deep_update(v)
                 return count
+            updated = deep_update(config_data) > 0
+        if updated:
+            logger.info(f"Successfully updated VLRecognition model_dir to {CUSTOM_MODEL_PATH}")
+        else:
+            logger.warning("Could not find VLRecognition sub-module in the configuration to update its path.")
+        with open(FINAL_CONFIG_FILE, 'w', encoding='utf-8') as f:
             yaml.dump(config_data, f, default_flow_style=False)
+        # Log final YAML for verification
+        logger.info("--- UPDATED YAML CONFIG ---")
+        print(yaml.dump(config_data, default_flow_style=False))
+        logger.info("--- END UPDATED YAML ---")
+        # 3. Initialize pipeline
+        logger.info(f"Initializing PaddleOCRVL with config: {FINAL_CONFIG_FILE}")
+        pipeline = PaddleOCRVL(pipeline_config=FINAL_CONFIG_FILE)
+        logger.info("PaddleOCRVL initialized successfully.")
     except Exception as e:
         logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
 # Initial setup
 if PADDLE_AVAILABLE:
     setup_pipeline()
 # --- Helper Functions ---
 # --- Inference Logic ---
 def run_inference(img_path, task_type="ocr"):
+    if not PADDLE_AVAILABLE or pipeline is None:
+        return "❌ Paddle backend not available. Check initialization logs.", "", "", ""
     if not img_path:
+        return "⚠️ Please upload an image.", "", "", ""
     try:
         logger.info(f"--- Inference Start: {task_type} ---")
         output = pipeline.predict(img_path)
         md_content = ""
         json_content = ""
         os.makedirs(run_output_dir, exist_ok=True)
         for i, res in enumerate(output):
             # Save results
             res.save_to_json(save_path=run_output_dir)
             res.save_to_markdown(save_path=run_output_dir)
             res.print()
+            # Read back generated files
+            fnames = os.listdir(run_output_dir)
+            for fname in fnames:
+                fpath = os.path.join(run_output_dir, fname)
+                if fname.endswith(".md"):
                     with open(fpath, 'r', encoding='utf-8') as f:
                         md_content += f.read() + "\n\n"
+                elif fname.endswith(".json"):
                     with open(fpath, 'r', encoding='utf-8') as f:
                         json_content += f.read() + "\n\n"
+                elif fname.endswith((".png", ".jpg", ".jpeg")) and "res" in fname:
                     vis_src = image_to_base64_data_url(fpath)
+                    vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden;">'
                     vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
                     vis_html += f'</div>'
         if not md_content:
+            md_content = "⚠️ Finished but no content was recognized."
         md_preview = _escape_inequalities_in_math(md_content)
+        logger.info("--- Inference Finished ---")
         return md_preview, md_content, vis_html, json_content
     except Exception as e:
         logger.error(f"Inference Error: {e}")
+        logger.error(traceback.format_exc())
+        return f"❌ Error: {str(e)}", "", "", ""
 # --- UI Components ---
+# (Keeping previous UI logic)
 custom_css = """
 body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
     color: white;
     border-radius: 1.5rem;
     margin-bottom: 2rem;
 }
+.app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; }
 .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
+.output-box { border: 1px solid #e2e8f0 !important; border-radius: 1rem !important; }
 """
 with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
     gr.HTML(
         """
         <div class="app-header">
             <h1>🌎 Tachiwin Document Parsing OCR 🦡</h1>
+            <p>Fine-tuned for the 68 Indigenous Languages of Mexico</p>
         </div>
         """
     )
     with gr.Row(elem_classes=["notice"]):
+        gr.Markdown(f"**Engine:** PaddleOCRVL 1.5 | **Model:** `{CUSTOM_MODEL_PATH}`")
     with gr.Tabs():
+        # Document Parsing Tab
         with gr.Tab("📄 Full Document Parsing"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_doc = gr.File(label="Upload Image", type="filepath")
+                    preview_doc_html = gr.HTML(visible=False)
+                    btn_parse = gr.Button("� Start Parsing", variant="primary")
+                    with gr.Row():
+                        chart_switch = gr.Checkbox(label="Chart OCR", value=True)
+                        unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("📝 Markdown View"):
                             md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
                         with gr.Tab("🖼️ Visual Results"):
+                            vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Waiting for results...</div>')
+                        with gr.Tab("📜 Raw Source"):
                             md_raw_doc = gr.Code(language="markdown")
             file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
             def parse_doc_wrapper(fp, ch, uw):
+                res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
+                return res_preview, res_vis, res_raw
             btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
+        # Element Recognition Tab
         with gr.Tab("🧩 Specific Recognition"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_vl = gr.File(label="Upload Element", type="filepath")
+                    preview_vl_html = gr.HTML(visible=False)
                     with gr.Row():
                         btn_ocr = gr.Button("Text OCR", variant="secondary")
+                        btn_formula = gr.Button("Formula", variant="secondary")
+                        btn_table = gr.Button("Table", variant="secondary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                 res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
                 return res_preview, res_raw
+            for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
                 btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
+        # Spotting Tab
         with gr.Tab("📍 Feature Spotting"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_spot = gr.File(label="Target Image", type="filepath")
+                    preview_spot_html = gr.HTML(visible=False)
                     btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("🖼️ Detection"):
+                            vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes view.</div>')
                         with gr.Tab("💾 JSON Feed"):
                             json_spot = gr.Code(label="JSON", language="json")
             btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
+    gr.Markdown("--- \n *May the indigenous languages of Mexico never be lost. Tachiwin Project.*")
 if __name__ == "__main__":
     demo.queue().launch()