Spaces:

tachiwin
/

document-ocr

Running

App Files Files Community

Luis J Camargo commited on 1 day ago

Commit

2ea14b2

1 Parent(s): 58fd993

refactor: Improve PaddleOCR pipeline setup, configuration, and error handling in `app.py` and add new UI-related reference modules.

Browse files

Files changed (1) hide show

app.py +210 -148

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import re
 import logging
 import sys
 import yaml
 from typing import Dict, List, Tuple, Any, Optional
 import time
@@ -14,18 +15,9 @@ from PIL import Image
 import requests
 from urllib.parse import urlparse
-# Paddle imports
-try:
-    from paddleocr import PaddleOCRVL
-    import paddlex
-    PADDLE_AVAILABLE = True
-except ImportError:
-    PADDLE_AVAILABLE = False
-    print("Warning: paddleocr or paddlex not found. Inference will be disabled.")
 # --- Configuration ---
-LOGGING_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging.StreamHandler(sys.stderr)])
 logger = logging.getLogger("TachiwinDocOCR")
 CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
@@ -41,61 +33,127 @@ LATEX_DELIMS = [
     {"left": "\\[", "right": "\\]", "display": True},
 ]
 # --- Model Initialization ---
 pipeline = None
 def setup_pipeline():
     global pipeline
     if not PADDLE_AVAILABLE:
         return
     try:
-        # 1. Generate default config if it doesn't exist
         if not os.path.exists(CONFIG_FILE):
-            logger.info(f"Generating default configuration file: {CONFIG_FILE}")
-            # Note: Using the internal paddlex API to get the config
-            # Equivalent to: paddlex --get_pipeline_config PaddleOCR-VL
-            from paddlex import create_pipeline
             temp_pipeline = create_pipeline("PaddleOCR-VL")
             temp_pipeline.export_pipeline_config(save_path=CONFIG_FILE)
-            logger.info("Default configuration exported.")
-        # 2. Modify config to point to custom model
-        logger.info(f"Modifying configuration to use custom model: {CUSTOM_MODEL_PATH}")
         with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
             config_data = yaml.safe_load(f)
-        # Update the model_dir for VLRecognition
-        # Heuristic: Find and update VLRecognition model_dir
-        if 'SubModules' in config_data and 'VLRecognition' in config_data['SubModules']:
-            config_data['SubModules']['VLRecognition']['model_dir'] = CUSTOM_MODEL_PATH
-            logger.info(f"Updated VLRecognition model_dir to {CUSTOM_MODEL_PATH}")
-        else:
-            logger.warning("Could not find VLRecognition in config_data['SubModules']. Attempting fallback.")
-            # Fallback searching through the dict if structure is different
-            def update_model_dir(d):
                 for k, v in d.items():
                     if k == 'VLRecognition' and isinstance(v, dict):
                         v['model_dir'] = CUSTOM_MODEL_PATH
-                        return True
-                    if isinstance(v, dict):
-                        if update_model_dir(v): return True
-                return False
-            update_model_dir(config_data)
         with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
-            yaml.dump(config_data, f)
-        # 3. Initialize pipeline with modified config
-        logger.info(f"Initializing PaddleOCRVL with config: {CONFIG_FILE}")
-        pipeline = PaddleOCRVL(pipeline_config=CONFIG_FILE)
-        logger.info("PaddleOCRVL initialized successfully.")
     except Exception as e:
-        logger.error(f"Failed to initialize PaddleOCRVL: {e}")
 if PADDLE_AVAILABLE:
     setup_pipeline()
 # --- Helper Functions ---
@@ -116,7 +174,7 @@ def image_to_base64_data_url(filepath: str) -> str:
 def _escape_inequalities_in_math(md: str) -> str:
     _MATH_PATTERNS = [
-        re.compile(r"\$\$([\s\S]+?)\$\$"),
         re.compile(r"\$([^\$]+?)\$"),
         re.compile(r"\\\[([\s\S]+?)\\\]"),
         re.compile(r"\\\(([\s\S]+?)\\\)"),
@@ -141,8 +199,8 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
         src = image_to_base64_data_url(path_or_url)
     html_content = f"""
-    <div class="uploaded-image">
-        <img src="{src}" alt="Preview image" style="width:100%;height:100%;object-fit:contain;" loading="lazy"/>
     </div>
     """
     return gr.update(value=html_content, visible=True)
@@ -150,214 +208,218 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
 # --- Inference Logic ---
 def run_inference(img_path, task_type="ocr"):
-    if not PADDLE_AVAILABLE or pipeline is None:
-        return "PaddleOCRVL is not available or failed to load. Ensure paddlex and paddleocr are installed.", "", "", ""
     if not img_path:
-        return "Please upload an image.", "", "", ""
     try:
-        logger.info(f"Running inference for {img_path} with task {task_type}")
-        # Adjust pipeline parameters based on task_type if needed
-        # PaddleOCRVL predict as per documentation
         output = pipeline.predict(img_path)
         md_content = ""
         json_content = ""
         vis_html = ""
-        run_id = str(int(time.time()))
         run_output_dir = os.path.join(OUTPUT_DIR, run_id)
         os.makedirs(run_output_dir, exist_ok=True)
         for i, res in enumerate(output):
-            # Save outputs
             res.save_to_json(save_path=run_output_dir)
             res.save_to_markdown(save_path=run_output_dir)
-            # Print for logs
             res.print()
-            # Extract content from generated files
-            for root, dirs, files in os.walk(run_output_dir):
-                for file in files:
-                    file_full_path = os.path.join(root, file)
-                    if file.endswith(".md"):
-                        with open(file_full_path, 'r', encoding='utf-8') as f:
-                            md_content += f.read() + "\n\n"
-                    elif file.endswith(".json"):
-                        with open(file_full_path, 'r', encoding='utf-8') as f:
-                            json_content += f.read() + "\n\n"
-                    elif file.endswith((".png", ".jpg", ".jpeg")) and "res" in file:
-                        # Found a visualization image
-                        vis_src = image_to_base64_data_url(file_full_path)
-                        vis_html += f'<div style="margin-bottom:20px;">'
-                        vis_html += f'<p style="font-weight:bold;">Visualization {i+1}:</p>'
-                        vis_html += f'<img src="{vis_src}" alt="Visualization {i+1}" style="width:100%; border-radius: 8px; border: 1px solid #ddd;">'
-                        vis_html += f'</div>'
         if not md_content:
-            md_content = "No text recognized."
         md_preview = _escape_inequalities_in_math(md_content)
         return md_preview, md_content, vis_html, json_content
     except Exception as e:
-        logger.error(f"Inference failed: {e}")
-        return f"Error: {str(e)}", "", "", ""
 # --- UI Components ---
-css = """
-body, .gradio-container { font-family: 'Inter', -apple-system, system-ui, sans-serif; }
 .app-header {
     text-align: center;
-    padding: 30px;
-    background: linear-gradient(120deg, rgb(2, 132, 199) 0%, rgb(16, 185, 129) 60%, rgb(5, 150, 105) 100%);
     color: white;
-    border-radius: 15px;
-    margin-bottom: 25px;
-    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
 }
-.app-header h1 { color: white !important; margin: 0; font-size: 2.5em; }
-.app-header p { font-size: 1.2em; opacity: 0.9; margin-top: 10px; }
-.notice { margin: 8px auto 0; max-width: 900px; padding: 10px 12px; border: 1px solid #e5e7eb; border-radius: 8px; background: #f8fafc; font-size: 14px; line-height: 1.6; }
-.quick-links { text-align: center; padding: 8px 0; border: 1px solid #e5e7eb; border-radius: 8px; margin: 8px auto; max-width: 900px; }
-.quick-links a { margin: 0 12px; font-size: 14px; font-weight: 600; color: #3b82f6; text-decoration: none; }
-.quick-links a:hover { text-decoration: underline; }
-#image_preview_doc, #image_preview_vl, #image_preview_spot { height: 400px !important; overflow: auto; border: 1px solid #ddd; border-radius: 8px; background: #eee; }
-#image_preview_doc img, #image_preview_vl img, #image_preview_spot img { width: 100% !important; height: auto !important; object-fit: contain !important; display: block; }
-.output_markdown { min-height: 30rem !important; font-size: 1.1rem !important; line-height: 1.6 !important; }
-.prose pre { background: #f1f5f9 !important; border-radius: 8px !important; padding: 10px !important; }
 """
-with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
-    # Header branding
     gr.HTML(
         """
         <div class="app-header">
             <h1>🌎 Tachiwin Document Parsing OCR 🦡</h1>
-            <p>Advancing Linguistic Rights for the 68 Indigenous Languages of Mexico</p>
         </div>
         """
     )
-    gr.HTML(f"""
-    <div class="notice">
-    <strong>Powered by PaddleOCRVL 1.5:</strong> Optimized for in-the-wild document parsing and fine-tuned for indigenous languages.
-    Initializing with custom weights: <code>{CUSTOM_MODEL_PATH}</code>
-    </div>
-    """)
-    gr.HTML("""<div class="quick-links"><a href="https://github.com/ljcamargo/tachiwin_paddleocrvl_finetuning" target="_blank">GitHub</a> | <a href="https://huggingface.co/PaddlePaddle/PaddleOCR-VL-1.5" target="_blank">Base Model</a> | <a href="https://www.paddleocr.com" target="_blank">Documentation</a></div>""")
     with gr.Tabs():
         # --- Tab 1: Document Parsing ---
-        with gr.Tab("📄 Document Parsing"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_doc = gr.File(label="Upload Document Image", file_count="single", type="filepath", file_types=["image"])
                     preview_doc_html = gr.HTML(value="", elem_id="image_preview_doc", visible=False)
                     with gr.Row(variant="panel"):
-                        with gr.Column(scale=2):
-                            btn_parse = gr.Button("🚀 Parse Document", variant="primary")
-                        with gr.Column(scale=3):
-                            with gr.Row():
-                                chart_switch = gr.Checkbox(label="Chart parsing", value=True)
-                                unwarp_switch = gr.Checkbox(label="Doc unwarping", value=False)
                 with gr.Column(scale=7):
                     with gr.Tabs():
-                        with gr.Tab("📝 Markdown Preview"):
-                            md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output_markdown")
-                        with gr.Tab("🖼️ Visualization"):
-                            vis_image_doc = gr.HTML("<p style='text-align:center; color:#888; padding: 20px;'>Parsing results will be visualized here.</p>")
                         with gr.Tab("📜 Markdown Source"):
                             md_raw_doc = gr.Code(language="markdown")
             file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
             def parse_doc_wrapper(fp, ch, uw):
-                if not fp: return "Please upload an image.", "", "", ""
-                res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="document")
-                return res_preview, res_vis, res_raw
             btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
         # --- Tab 2: Element Recognition ---
-        with gr.Tab("🧩 Element Recognition"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_vl = gr.File(label="Upload Element Image", file_count="single", type="filepath", file_types=["image"])
                     preview_vl_html = gr.HTML(value="", elem_id="image_preview_vl", visible=False)
                     with gr.Row():
-                        btn_ocr = gr.Button("Text Recognition", variant="secondary")
-                        btn_formula = gr.Button("Formula Recognition", variant="secondary")
                     with gr.Row():
-                        btn_table = gr.Button("Table Recognition", variant="secondary")
-                        btn_chart = gr.Button("Chart Recognition", variant="secondary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("📊 Result"):
-                            md_preview_vl = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output_markdown")
-                        with gr.Tab("📜 Raw Output"):
                             md_raw_vl = gr.Code(language="markdown")
             file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
             def run_vl_wrapper(fp, prompt):
-                if not fp: return "Please upload an image.", "", ""
-                res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type=prompt)
                 return res_preview, res_raw
-            for btn, prompt in [(btn_ocr, "Text Recognition"), (btn_formula, "Formula Recognition"), (btn_table, "Table Recognition"), (btn_chart, "Chart Recognition")]:
                 btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
         # --- Tab 3: Spotting ---
-        with gr.Tab("📍 Spotting"):
             with gr.Row():
                 with gr.Column(scale=5):
-                    file_spot = gr.File(label="Upload Image for Detection", file_count="single", type="filepath", file_types=["image"])
                     preview_spot_html = gr.HTML(value="", elem_id="image_preview_spot", visible=False)
-                    btn_run_spot = gr.Button("Run Spotting", variant="primary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
-                        with gr.Tab("🖼️ Visualization"):
-                            vis_image_spot = gr.HTML("<p style='text-align:center; color:#888; padding: 20px;'>Detection visualization.</p>")
-                        with gr.Tab("💾 JSON Result"):
-                            json_spot = gr.Code(label="Detection Results", language="json")
             file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
             def run_spotting_wrapper(fp):
-                if not fp: return "", ""
-                res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="spotting")
-                return res_vis, res_json
             btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
-    # Footer Information
     gr.Markdown(
         """
         ---
-        ### ℹ️ About Tachiwin 🦡
-        **Tachiwin** (from Totonac - "Language") is dedicated to bridging the digital divide for indigenous languages of Mexico through AI technology. This model represents a **world first in tech access and linguistic rights**, specifically trained to recognize the 68 indigenous languages of Mexico.
-        ### Supported Language Families
-        **Uto-Aztecan:** Náhuatl, Yaqui, Mayo, Huichol, Tepehuán, Tarahumara
-        **Mayan:** Maya, Tzeltal, Tzotzil, Chol, Tojolabal, Q'anjob'al, Mam
-        **Oto-Manguean:** Zapoteco, Mixteco, Otomí, Mazateco, Chinanteco, Triqui
-        **Totonac-Tepehua:** Totonaco, Tepehua
-        **Mixe-Zoque:** Mixe, Zoque, Popoluca
-        **Other:** Purépecha, Huave, Seri, Kickapoo, Kiliwa
-        Made with ❤️ for linguistic diversity and indigenous rights 🦡
         """
     )

 import logging
 import sys
 import yaml
+import traceback
 from typing import Dict, List, Tuple, Any, Optional
 import time
 import requests
 from urllib.parse import urlparse
 # --- Configuration ---
+LOGGING_FORMAT = '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
+logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging.StreamHandler(sys.stdout)])
 logger = logging.getLogger("TachiwinDocOCR")
 CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
     {"left": "\\[", "right": "\\]", "display": True},
 ]
+# --- Paddle imports and Diagnostic ---
+PADDLE_AVAILABLE = False
+PADDLEX_VERSION = "Unknown"
+PADDLEOCR_VERSION = "Unknown"
+try:
+    import paddle
+    import paddlex
+    from paddlex import create_pipeline
+    from paddleocr import PaddleOCRVL
+    PADDLE_AVAILABLE = True
+    PADDLEX_VERSION = getattr(paddlex, "__version__", "Unknown")
+    logger.info(f"Paddle libraries loaded. PaddleX version: {PADDLEX_VERSION}")
+except ImportError as e:
+    logger.error(f"Import Error: {e}")
+    logger.error(traceback.format_exc())
+except Exception as e:
+    logger.error(f"Unexpected error during import: {e}")
+    logger.error(traceback.format_exc())
 # --- Model Initialization ---
 pipeline = None
 def setup_pipeline():
     global pipeline
     if not PADDLE_AVAILABLE:
+        logger.error("Skipping pipeline setup because Paddle is not available.")
         return
     try:
+        logger.info("Starting setup_pipeline...")
+        # 1. Generate default config via CLI-like method to avoid early model download
+        # We'll use create_pipeline and then export_pipeline_config, but we need to be careful
+        # as create_pipeline might download the model immediately.
+        # If the file exists, we'll read it. If not, we'll try to create a minimal one or use paddlex CLI.
         if not os.path.exists(CONFIG_FILE):
+            logger.info(f"Generating default configuration for PaddleOCR-VL...")
+            # Ideally: paddlex --get_pipeline_config PaddleOCR-VL
+            # We can try to get it from paddlex registry if documented
+            try:
+                from paddlex.inference.pipelines import pipeline_registry
+                # This is internal, but let's try to find if we can get the default dict
+                logger.info(f"Registered pipelines: {list(pipeline_registry.keys())[:5]}...")
+            except:
+                pass
+            # Fallback: Create a temporary pipeline to export config
+            logger.info("Initializing a temporary pipeline to export default configuration...")
             temp_pipeline = create_pipeline("PaddleOCR-VL")
             temp_pipeline.export_pipeline_config(save_path=CONFIG_FILE)
+            logger.info(f"Default configuration exported to {CONFIG_FILE}")
+        # 2. Load and Modify Config
+        logger.info(f"Loading configuration from {CONFIG_FILE}")
         with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
             config_data = yaml.safe_load(f)
+        logger.info("Modifying configuration with custom model path...")
+        # Rigorous path search and modification
+        modified = False
+        # Check standard PaddleX structure
+        if 'SubModules' in config_data:
+            for sub_name, sub_cfg in config_data['SubModules'].items():
+                if sub_name == 'VLRecognition':
+                    old_path = sub_cfg.get('model_dir')
+                    sub_cfg['model_dir'] = CUSTOM_MODEL_PATH
+                    logger.info(f"Success: Updated SubModules.VLRecognition.model_dir from '{old_path}' to '{CUSTOM_MODEL_PATH}'")
+                    modified = True
+        if not modified:
+            logger.warning("Standard SubModules.VLRecognition path not found. performing deep search...")
+            def deep_update(d):
+                count = 0
                 for k, v in d.items():
                     if k == 'VLRecognition' and isinstance(v, dict):
+                        old = v.get('model_dir')
                         v['model_dir'] = CUSTOM_MODEL_PATH
+                        logger.info(f"Deep search found VLRecognition. Updated model_dir from '{old}' to '{CUSTOM_MODEL_PATH}'")
+                        count += 1
+                    elif isinstance(v, dict):
+                        count += deep_update(v)
+                return count
+            if deep_update(config_data) > 0:
+                modified = True
+        # Save modified config
         with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
+            yaml.dump(config_data, f, default_flow_style=False)
+        # 3. Log the final YAML to console as requested
+        logger.info("--- FINAL YAML CONFIGURATION ---")
+        yaml_str = yaml.dump(config_data, default_flow_style=False)
+        print(yaml_str)
+        logger.info("--- END FINAL YAML CONFIGURATION ---")
+        # 4. Initialize pipeline with modified config
+        logger.info(f"Initializing PaddleOCRVL with custom config file: {CONFIG_FILE}")
+        # Note: We use PaddleOCRVL(pipeline_config=CONFIG_FILE) as per our research
+        # If that fails, we can try create_pipeline(CONFIG_FILE)
+        try:
+            pipeline = PaddleOCRVL(pipeline_config=CONFIG_FILE)
+            logger.info("Success: PaddleOCRVL initialized with custom config.")
+        except Exception as e:
+            logger.warning(f"PaddleOCRVL(pipeline_config=...) failed: {e}. Trying create_pipeline(path_to_yaml)...")
+            pipeline = create_pipeline(CONFIG_FILE)
+            logger.info("Success: Pipeline initialized using create_pipeline(CONFIG_FILE).")
     except Exception as e:
+        logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
+        logger.error(traceback.format_exc())
+# Initial setup
 if PADDLE_AVAILABLE:
     setup_pipeline()
+else:
+    logger.error("Inference backend disabled: Paddle libraries not found.")
 # --- Helper Functions ---
 def _escape_inequalities_in_math(md: str) -> str:
     _MATH_PATTERNS = [
+        re.compile(r"\$$([\s\S]+?)\$$"),
         re.compile(r"\$([^\$]+?)\$"),
         re.compile(r"\\\[([\s\S]+?)\\\]"),
         re.compile(r"\\\(([\s\S]+?)\\\)"),
         src = image_to_base64_data_url(path_or_url)
     html_content = f"""
+    <div class="uploaded-image" style="background: white; padding: 10px; border-radius: 8px;">
+        <img src="{src}" alt="Preview" style="width:100%; height:auto; max-height:800px; object-fit:contain;"/>
     </div>
     """
     return gr.update(value=html_content, visible=True)
 # --- Inference Logic ---
 def run_inference(img_path, task_type="ocr"):
+    status_msg = ""
+    if not PADDLE_AVAILABLE:
+        status_msg = "❌ Paddle libraries not installed."
+        logger.error(status_msg)
+        return status_msg, "", "", ""
+    if pipeline is None:
+        status_msg = "❌ Pipeline failed to initialize. Check logs for details."
+        logger.error(status_msg)
+        return status_msg, "", "", ""
     if not img_path:
+        return "⚠️ Please upload an image first.", "", "", ""
     try:
+        logger.info(f"--- Inference Start: {task_type} ---")
+        logger.info(f"Image: {img_path}")
+        start_time = time.time()
         output = pipeline.predict(img_path)
+        end_time = time.time()
+        logger.info(f"Inference completed in {end_time - start_time:.2f} seconds.")
         md_content = ""
         json_content = ""
         vis_html = ""
+        run_id = f"run_{int(time.time())}"
         run_output_dir = os.path.join(OUTPUT_DIR, run_id)
         os.makedirs(run_output_dir, exist_ok=True)
         for i, res in enumerate(output):
+            logger.info(f"Processing output segment {i+1}...")
+            # Save results
             res.save_to_json(save_path=run_output_dir)
             res.save_to_markdown(save_path=run_output_dir)
+            # Print to stdout
             res.print()
+            # Read files back for Gradio
+            files_found = os.listdir(run_output_dir)
+            logger.info(f"Generated files: {files_found}")
+            for file in files_found:
+                fpath = os.path.join(run_output_dir, file)
+                if file.endswith(".md"):
+                    with open(fpath, 'r', encoding='utf-8') as f:
+                        md_content += f.read() + "\n\n"
+                elif file.endswith(".json"):
+                    with open(fpath, 'r', encoding='utf-8') as f:
+                        json_content += f.read() + "\n\n"
+                elif file.endswith((".png", ".jpg", ".jpeg")) and ("res" in file or "vis" in file):
+                    vis_src = image_to_base64_data_url(fpath)
+                    vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background: white;">'
+                    vis_html += f'<div style="background: #10b981; color: white; padding: 5px 15px; font-weight: bold;">Visualization {i+1}</div>'
+                    vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
+                    vis_html += f'</div>'
         if not md_content:
+            md_content = "⚠️ OCR finished but no text was extracted."
         md_preview = _escape_inequalities_in_math(md_content)
+        logger.info("--- Inference Finished Successfully ---")
         return md_preview, md_content, vis_html, json_content
     except Exception as e:
+        err_detail = traceback.format_exc()
+        logger.error(f"Inference Error: {e}")
+        logger.error(err_detail)
+        return f"❌ Error: {str(e)}\n\nCheck logs for more details.", "", "", ""
 # --- UI Components ---
+custom_css = """
+body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
 .app-header {
     text-align: center;
+    padding: 2.5rem;
+    background: linear-gradient(135deg, #0284c7 0%, #10b981 100%);
     color: white;
+    border-radius: 1.5rem;
+    margin-bottom: 2rem;
+    box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
 }
+.app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; margin-bottom: 0.5rem; }
+.app-header p { font-size: 1.25rem; opacity: 0.95; }
+.notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
+.quick-links { display: flex; justify-content: center; gap: 1.5rem; margin-bottom: 2rem; font-weight: 600; }
+.quick-links a { color: #0284c7; text-decoration: none; transition: color 0.2s; }
+.quick-links a:hover { color: #0369a1; text-decoration: underline; }
+.output-box { border-radius: 1rem !important; border: 1px solid #e2e8f0 !important; }
+.status-indicator { font-family: monospace; font-size: 0.875rem; color: #64748b; margin-top: 0.5rem; }
 """
+with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
+    # Diagnostic Info
+    gr.HTML(f"""<div style="display:none">Paddle Status: {PADDLE_AVAILABLE}, X: {PADDLEX_VERSION}</div>""")
+    # Branding Header
     gr.HTML(
         """
         <div class="app-header">
             <h1>🌎 Tachiwin Document Parsing OCR 🦡</h1>
+            <p>Empowering the Indigenous Languages of Mexico through State-of-the-Art OCR</p>
         </div>
         """
     )
+    with gr.Row(elem_classes=["notice"]):
+        gr.Markdown(f"""
+        **🚀 Engine Status:** Using **PaddleOCRVL 1.5** with custom weights: `{CUSTOM_MODEL_PATH}`.
+        Supported Languages: 68 Official Mexican Indigenous Languages.
+        """)
+    with gr.Row(elem_classes=["quick-links"]):
+        gr.HTML('<a href="https://github.com/ljcamargo/tachiwin_paddleocrvl_finetuning" target="_blank">💻 GitHub</a>')
+        gr.HTML('<a href="https://huggingface.co/tachiwin/PaddleOCR-VL-Tachiwin-BF16" target="_blank">🤗 Model Repo</a>')
+        gr.HTML('<a href="https://www.paddleocr.com" target="_blank">📚 Documentation</a>')
     with gr.Tabs():
         # --- Tab 1: Document Parsing ---
+        with gr.Tab("📄 Full Document Parsing"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_doc = gr.File(label="Upload Image", file_count="single", type="filepath", file_types=["image"])
                     preview_doc_html = gr.HTML(value="", elem_id="image_preview_doc", visible=False)
                     with gr.Row(variant="panel"):
+                        btn_parse = gr.Button("🔍 Start Parsing", variant="primary", scale=2)
+                        with gr.Column(scale=1):
+                            chart_switch = gr.Checkbox(label="Chart OCR", value=True)
+                            unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
                 with gr.Column(scale=7):
                     with gr.Tabs():
+                        with gr.Tab("📝 Markdown View"):
+                            md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
+                        with gr.Tab("🖼️ Visual Results"):
+                            vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Upload and parse to see visual results.</div>')
                         with gr.Tab("📜 Markdown Source"):
                             md_raw_doc = gr.Code(language="markdown")
             file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
             def parse_doc_wrapper(fp, ch, uw):
+                return run_inference(fp, task_type="Document Parsing")[:3] # Returns Preview, Vis, Raw
             btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
         # --- Tab 2: Element Recognition ---
+        with gr.Tab("🧩 Specific Recognition"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_vl = gr.File(label="Upload Element", file_count="single", type="filepath", file_types=["image"])
                     preview_vl_html = gr.HTML(value="", elem_id="image_preview_vl", visible=False)
                     with gr.Row():
+                        btn_ocr = gr.Button("Text OCR", variant="secondary")
+                        btn_formula = gr.Button("Math Formula", variant="secondary")
                     with gr.Row():
+                        btn_table = gr.Button("Table Data", variant="secondary")
+                        btn_chart = gr.Button("Chart Data", variant="secondary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
                         with gr.Tab("📊 Result"):
+                            md_preview_vl = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
+                        with gr.Tab("📜 Source"):
                             md_raw_vl = gr.Code(language="markdown")
             file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
             def run_vl_wrapper(fp, prompt):
+                res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
                 return res_preview, res_raw
+            for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table"), (btn_chart, "Chart")]:
                 btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
         # --- Tab 3: Spotting ---
+        with gr.Tab("📍 Feature Spotting"):
             with gr.Row():
                 with gr.Column(scale=5):
+                    file_spot = gr.File(label="Target Image", file_count="single", type="filepath", file_types=["image"])
                     preview_spot_html = gr.HTML(value="", elem_id="image_preview_spot", visible=False)
+                    btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
                 with gr.Column(scale=7):
                     with gr.Tabs():
+                        with gr.Tab("🖼️ Detection"):
+                            vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes will appear here.</div>')
+                        with gr.Tab("💾 JSON Feed"):
+                            json_spot = gr.Code(label="JSON", language="json")
             file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
             def run_spotting_wrapper(fp):
+                _, _, vis, js = run_inference(fp, task_type="Spotting")
+                return vis, js
             btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
+    # Footer
     gr.Markdown(
         """
         ---
+        ### 🌎 Tachiwin Project 🦡
+        Dedicated to bridging the digital divide for the 68 officially recognized indigenous languages of Mexico.
+        **Supported Families:** Uto-Aztecan, Mayan, Oto-Manguean, Totonac-Tepehua, Mixe-Zoque, and more.
+        *Linguistic rights are human rights.*
         """
     )