Luis J Camargo commited on
Commit
4bdfa9b
Β·
1 Parent(s): 2ea14b2

refactor: Streamline PaddleOCR-VL pipeline setup using `paddlex` CLI for config generation and add new UI reference and existing OCR app files.

Browse files
Files changed (1) hide show
  1. app.py +96 -162
app.py CHANGED
@@ -7,6 +7,7 @@ import logging
7
  import sys
8
  import yaml
9
  import traceback
 
10
  from typing import Dict, List, Tuple, Any, Optional
11
  import time
12
 
@@ -21,7 +22,10 @@ logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging
21
  logger = logging.getLogger("TachiwinDocOCR")
22
 
23
  CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
24
- CONFIG_FILE = "custom_pipeline_config.yaml"
 
 
 
25
  OUTPUT_DIR = "output"
26
  os.makedirs(OUTPUT_DIR, exist_ok=True)
27
 
@@ -35,23 +39,16 @@ LATEX_DELIMS = [
35
 
36
  # --- Paddle imports and Diagnostic ---
37
  PADDLE_AVAILABLE = False
38
- PADDLEX_VERSION = "Unknown"
39
- PADDLEOCR_VERSION = "Unknown"
40
-
41
  try:
42
  import paddle
43
  import paddlex
44
- from paddlex import create_pipeline
45
  from paddleocr import PaddleOCRVL
46
  PADDLE_AVAILABLE = True
47
- PADDLEX_VERSION = getattr(paddlex, "__version__", "Unknown")
48
- logger.info(f"Paddle libraries loaded. PaddleX version: {PADDLEX_VERSION}")
49
  except ImportError as e:
50
  logger.error(f"Import Error: {e}")
51
- logger.error(traceback.format_exc())
52
  except Exception as e:
53
  logger.error(f"Unexpected error during import: {e}")
54
- logger.error(traceback.format_exc())
55
 
56
  # --- Model Initialization ---
57
  pipeline = None
@@ -65,85 +62,75 @@ def setup_pipeline():
65
  try:
66
  logger.info("Starting setup_pipeline...")
67
 
68
- # 1. Generate default config via CLI-like method to avoid early model download
69
- # We'll use create_pipeline and then export_pipeline_config, but we need to be careful
70
- # as create_pipeline might download the model immediately.
71
-
72
- # If the file exists, we'll read it. If not, we'll try to create a minimal one or use paddlex CLI.
73
- if not os.path.exists(CONFIG_FILE):
74
- logger.info(f"Generating default configuration for PaddleOCR-VL...")
75
- # Ideally: paddlex --get_pipeline_config PaddleOCR-VL
76
- # We can try to get it from paddlex registry if documented
77
  try:
78
- from paddlex.inference.pipelines import pipeline_registry
79
- # This is internal, but let's try to find if we can get the default dict
80
- logger.info(f"Registered pipelines: {list(pipeline_registry.keys())[:5]}...")
81
- except:
82
- pass
83
-
84
- # Fallback: Create a temporary pipeline to export config
85
- logger.info("Initializing a temporary pipeline to export default configuration...")
86
- temp_pipeline = create_pipeline("PaddleOCR-VL")
87
- temp_pipeline.export_pipeline_config(save_path=CONFIG_FILE)
88
- logger.info(f"Default configuration exported to {CONFIG_FILE}")
 
 
 
 
 
 
 
 
89
 
90
  # 2. Load and Modify Config
91
- logger.info(f"Loading configuration from {CONFIG_FILE}")
92
- with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
93
  config_data = yaml.safe_load(f)
94
 
95
  logger.info("Modifying configuration with custom model path...")
96
 
97
- # Rigorous path search and modification
98
- modified = False
99
-
100
- # Check standard PaddleX structure
101
  if 'SubModules' in config_data:
102
- for sub_name, sub_cfg in config_data['SubModules'].items():
103
- if sub_name == 'VLRecognition':
104
- old_path = sub_cfg.get('model_dir')
105
- sub_cfg['model_dir'] = CUSTOM_MODEL_PATH
106
- logger.info(f"Success: Updated SubModules.VLRecognition.model_dir from '{old_path}' to '{CUSTOM_MODEL_PATH}'")
107
- modified = True
108
 
109
- if not modified:
110
- logger.warning("Standard SubModules.VLRecognition path not found. performing deep search...")
111
  def deep_update(d):
112
  count = 0
113
  for k, v in d.items():
114
  if k == 'VLRecognition' and isinstance(v, dict):
115
- old = v.get('model_dir')
116
  v['model_dir'] = CUSTOM_MODEL_PATH
117
- logger.info(f"Deep search found VLRecognition. Updated model_dir from '{old}' to '{CUSTOM_MODEL_PATH}'")
118
  count += 1
119
  elif isinstance(v, dict):
120
  count += deep_update(v)
121
  return count
122
-
123
- if deep_update(config_data) > 0:
124
- modified = True
 
 
 
125
 
126
- # Save modified config
127
- with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
128
  yaml.dump(config_data, f, default_flow_style=False)
129
 
130
- # 3. Log the final YAML to console as requested
131
- logger.info("--- FINAL YAML CONFIGURATION ---")
132
- yaml_str = yaml.dump(config_data, default_flow_style=False)
133
- print(yaml_str)
134
- logger.info("--- END FINAL YAML CONFIGURATION ---")
135
-
136
- # 4. Initialize pipeline with modified config
137
- logger.info(f"Initializing PaddleOCRVL with custom config file: {CONFIG_FILE}")
138
- # Note: We use PaddleOCRVL(pipeline_config=CONFIG_FILE) as per our research
139
- # If that fails, we can try create_pipeline(CONFIG_FILE)
140
- try:
141
- pipeline = PaddleOCRVL(pipeline_config=CONFIG_FILE)
142
- logger.info("Success: PaddleOCRVL initialized with custom config.")
143
- except Exception as e:
144
- logger.warning(f"PaddleOCRVL(pipeline_config=...) failed: {e}. Trying create_pipeline(path_to_yaml)...")
145
- pipeline = create_pipeline(CONFIG_FILE)
146
- logger.info("Success: Pipeline initialized using create_pipeline(CONFIG_FILE).")
147
 
148
  except Exception as e:
149
  logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
@@ -152,8 +139,6 @@ def setup_pipeline():
152
  # Initial setup
153
  if PADDLE_AVAILABLE:
154
  setup_pipeline()
155
- else:
156
- logger.error("Inference backend disabled: Paddle libraries not found.")
157
 
158
  # --- Helper Functions ---
159
 
@@ -208,29 +193,15 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
208
  # --- Inference Logic ---
209
 
210
  def run_inference(img_path, task_type="ocr"):
211
- status_msg = ""
212
- if not PADDLE_AVAILABLE:
213
- status_msg = "❌ Paddle libraries not installed."
214
- logger.error(status_msg)
215
- return status_msg, "", "", ""
216
-
217
- if pipeline is None:
218
- status_msg = "❌ Pipeline failed to initialize. Check logs for details."
219
- logger.error(status_msg)
220
- return status_msg, "", "", ""
221
 
222
  if not img_path:
223
- return "⚠️ Please upload an image first.", "", "", ""
224
 
225
  try:
226
  logger.info(f"--- Inference Start: {task_type} ---")
227
- logger.info(f"Image: {img_path}")
228
-
229
- start_time = time.time()
230
  output = pipeline.predict(img_path)
231
- end_time = time.time()
232
-
233
- logger.info(f"Inference completed in {end_time - start_time:.2f} seconds.")
234
 
235
  md_content = ""
236
  json_content = ""
@@ -241,47 +212,41 @@ def run_inference(img_path, task_type="ocr"):
241
  os.makedirs(run_output_dir, exist_ok=True)
242
 
243
  for i, res in enumerate(output):
244
- logger.info(f"Processing output segment {i+1}...")
245
  # Save results
246
  res.save_to_json(save_path=run_output_dir)
247
  res.save_to_markdown(save_path=run_output_dir)
248
-
249
- # Print to stdout
250
  res.print()
251
 
252
- # Read files back for Gradio
253
- files_found = os.listdir(run_output_dir)
254
- logger.info(f"Generated files: {files_found}")
255
-
256
- for file in files_found:
257
- fpath = os.path.join(run_output_dir, file)
258
- if file.endswith(".md"):
259
  with open(fpath, 'r', encoding='utf-8') as f:
260
  md_content += f.read() + "\n\n"
261
- elif file.endswith(".json"):
262
  with open(fpath, 'r', encoding='utf-8') as f:
263
  json_content += f.read() + "\n\n"
264
- elif file.endswith((".png", ".jpg", ".jpeg")) and ("res" in file or "vis" in file):
265
  vis_src = image_to_base64_data_url(fpath)
266
- vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background: white;">'
267
- vis_html += f'<div style="background: #10b981; color: white; padding: 5px 15px; font-weight: bold;">Visualization {i+1}</div>'
268
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
269
  vis_html += f'</div>'
270
 
271
  if not md_content:
272
- md_content = "⚠️ OCR finished but no text was extracted."
273
 
274
  md_preview = _escape_inequalities_in_math(md_content)
275
- logger.info("--- Inference Finished Successfully ---")
276
  return md_preview, md_content, vis_html, json_content
277
 
278
  except Exception as e:
279
- err_detail = traceback.format_exc()
280
  logger.error(f"Inference Error: {e}")
281
- logger.error(err_detail)
282
- return f"❌ Error: {str(e)}\n\nCheck logs for more details.", "", "", ""
283
 
284
  # --- UI Components ---
 
285
 
286
  custom_css = """
287
  body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
@@ -292,84 +257,64 @@ body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
292
  color: white;
293
  border-radius: 1.5rem;
294
  margin-bottom: 2rem;
295
- box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
296
  }
297
- .app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; margin-bottom: 0.5rem; }
298
- .app-header p { font-size: 1.25rem; opacity: 0.95; }
299
  .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
300
- .quick-links { display: flex; justify-content: center; gap: 1.5rem; margin-bottom: 2rem; font-weight: 600; }
301
- .quick-links a { color: #0284c7; text-decoration: none; transition: color 0.2s; }
302
- .quick-links a:hover { color: #0369a1; text-decoration: underline; }
303
- .output-box { border-radius: 1rem !important; border: 1px solid #e2e8f0 !important; }
304
- .status-indicator { font-family: monospace; font-size: 0.875rem; color: #64748b; margin-top: 0.5rem; }
305
  """
306
 
307
  with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
308
- # Diagnostic Info
309
- gr.HTML(f"""<div style="display:none">Paddle Status: {PADDLE_AVAILABLE}, X: {PADDLEX_VERSION}</div>""")
310
-
311
- # Branding Header
312
  gr.HTML(
313
  """
314
  <div class="app-header">
315
  <h1>🌎 Tachiwin Document Parsing OCR 🦑</h1>
316
- <p>Empowering the Indigenous Languages of Mexico through State-of-the-Art OCR</p>
317
  </div>
318
  """
319
  )
320
 
321
  with gr.Row(elem_classes=["notice"]):
322
- gr.Markdown(f"""
323
- **πŸš€ Engine Status:** Using **PaddleOCRVL 1.5** with custom weights: `{CUSTOM_MODEL_PATH}`.
324
- Supported Languages: 68 Official Mexican Indigenous Languages.
325
- """)
326
-
327
- with gr.Row(elem_classes=["quick-links"]):
328
- gr.HTML('<a href="https://github.com/ljcamargo/tachiwin_paddleocrvl_finetuning" target="_blank">πŸ’» GitHub</a>')
329
- gr.HTML('<a href="https://huggingface.co/tachiwin/PaddleOCR-VL-Tachiwin-BF16" target="_blank">πŸ€— Model Repo</a>')
330
- gr.HTML('<a href="https://www.paddleocr.com" target="_blank">πŸ“š Documentation</a>')
331
 
332
  with gr.Tabs():
333
- # --- Tab 1: Document Parsing ---
334
  with gr.Tab("πŸ“„ Full Document Parsing"):
335
  with gr.Row():
336
  with gr.Column(scale=5):
337
- file_doc = gr.File(label="Upload Image", file_count="single", type="filepath", file_types=["image"])
338
- preview_doc_html = gr.HTML(value="", elem_id="image_preview_doc", visible=False)
339
- with gr.Row(variant="panel"):
340
- btn_parse = gr.Button("πŸ” Start Parsing", variant="primary", scale=2)
341
- with gr.Column(scale=1):
342
- chart_switch = gr.Checkbox(label="Chart OCR", value=True)
343
- unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
344
 
345
  with gr.Column(scale=7):
346
  with gr.Tabs():
347
  with gr.Tab("πŸ“ Markdown View"):
348
  md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
349
  with gr.Tab("πŸ–ΌοΈ Visual Results"):
350
- vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Upload and parse to see visual results.</div>')
351
- with gr.Tab("πŸ“œ Markdown Source"):
352
  md_raw_doc = gr.Code(language="markdown")
353
 
354
  file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
355
 
356
  def parse_doc_wrapper(fp, ch, uw):
357
- return run_inference(fp, task_type="Document Parsing")[:3] # Returns Preview, Vis, Raw
 
358
 
359
  btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
360
 
361
- # --- Tab 2: Element Recognition ---
362
  with gr.Tab("🧩 Specific Recognition"):
363
  with gr.Row():
364
  with gr.Column(scale=5):
365
- file_vl = gr.File(label="Upload Element", file_count="single", type="filepath", file_types=["image"])
366
- preview_vl_html = gr.HTML(value="", elem_id="image_preview_vl", visible=False)
367
  with gr.Row():
368
  btn_ocr = gr.Button("Text OCR", variant="secondary")
369
- btn_formula = gr.Button("Math Formula", variant="secondary")
370
- with gr.Row():
371
- btn_table = gr.Button("Table Data", variant="secondary")
372
- btn_chart = gr.Button("Chart Data", variant="secondary")
373
 
374
  with gr.Column(scale=7):
375
  with gr.Tabs():
@@ -384,21 +329,21 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
384
  res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
385
  return res_preview, res_raw
386
 
387
- for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table"), (btn_chart, "Chart")]:
388
  btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
389
 
390
- # --- Tab 3: Spotting ---
391
  with gr.Tab("πŸ“ Feature Spotting"):
392
  with gr.Row():
393
  with gr.Column(scale=5):
394
- file_spot = gr.File(label="Target Image", file_count="single", type="filepath", file_types=["image"])
395
- preview_spot_html = gr.HTML(value="", elem_id="image_preview_spot", visible=False)
396
  btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
397
 
398
  with gr.Column(scale=7):
399
  with gr.Tabs():
400
  with gr.Tab("πŸ–ΌοΈ Detection"):
401
- vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes will appear here.</div>')
402
  with gr.Tab("πŸ’Ύ JSON Feed"):
403
  json_spot = gr.Code(label="JSON", language="json")
404
 
@@ -410,18 +355,7 @@ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
410
 
411
  btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
412
 
413
- # Footer
414
- gr.Markdown(
415
- """
416
- ---
417
- ### 🌎 Tachiwin Project 🦑
418
- Dedicated to bridging the digital divide for the 68 officially recognized indigenous languages of Mexico.
419
-
420
- **Supported Families:** Uto-Aztecan, Mayan, Oto-Manguean, Totonac-Tepehua, Mixe-Zoque, and more.
421
-
422
- *Linguistic rights are human rights.*
423
- """
424
- )
425
 
426
  if __name__ == "__main__":
427
  demo.queue().launch()
 
7
  import sys
8
  import yaml
9
  import traceback
10
+ import subprocess
11
  from typing import Dict, List, Tuple, Any, Optional
12
  import time
13
 
 
22
  logger = logging.getLogger("TachiwinDocOCR")
23
 
24
  CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
25
+ # The CLI generated filename is usually {pipeline_name}.yaml
26
+ INTERNAL_CONFIG_FILE = "PaddleOCR-VL.yaml"
27
+ # Our final working file
28
+ FINAL_CONFIG_FILE = "custom_pipeline_config.yaml"
29
  OUTPUT_DIR = "output"
30
  os.makedirs(OUTPUT_DIR, exist_ok=True)
31
 
 
39
 
40
  # --- Paddle imports and Diagnostic ---
41
  PADDLE_AVAILABLE = False
 
 
 
42
  try:
43
  import paddle
44
  import paddlex
 
45
  from paddleocr import PaddleOCRVL
46
  PADDLE_AVAILABLE = True
47
+ logger.info(f"Paddle libraries loaded. PaddleX version: {getattr(paddlex, '__version__', 'Unknown')}")
 
48
  except ImportError as e:
49
  logger.error(f"Import Error: {e}")
 
50
  except Exception as e:
51
  logger.error(f"Unexpected error during import: {e}")
 
52
 
53
  # --- Model Initialization ---
54
  pipeline = None
 
62
  try:
63
  logger.info("Starting setup_pipeline...")
64
 
65
+ # 1. Generate default config via CLI
66
+ if not os.path.exists(FINAL_CONFIG_FILE):
67
+ logger.info("Generating default configuration via paddlex CLI...")
68
+ # Command: paddlex --get_pipeline_config PaddleOCR-VL --save_path ./
 
 
 
 
 
69
  try:
70
+ result = subprocess.run(
71
+ ["paddlex", "--get_pipeline_config", "PaddleOCR-VL", "--save_path", "./"],
72
+ capture_output=True, text=True, check=True
73
+ )
74
+ logger.info(f"CLI Output: {result.stdout}")
75
+ except subprocess.CalledProcessError as e:
76
+ logger.error(f"CLI Error: {e.stderr}")
77
+ # If CLI fails, we can't proceed with custom model easily without a template
78
+ raise e
79
+
80
+ # The file generated is likely PaddleOCR-VL.yaml
81
+ if os.path.exists(INTERNAL_CONFIG_FILE):
82
+ os.rename(INTERNAL_CONFIG_FILE, FINAL_CONFIG_FILE)
83
+ logger.info(f"Renamed {INTERNAL_CONFIG_FILE} to {FINAL_CONFIG_FILE}")
84
+ else:
85
+ logger.error(f"Expected config file {INTERNAL_CONFIG_FILE} was not found after CLI execution.")
86
+ # List files to see what was created
87
+ logger.info(f"Current directory files: {os.listdir('.')}")
88
+ raise FileNotFoundError(f"Config file {INTERNAL_CONFIG_FILE} not found.")
89
 
90
  # 2. Load and Modify Config
91
+ logger.info(f"Loading configuration from {FINAL_CONFIG_FILE}")
92
+ with open(FINAL_CONFIG_FILE, 'r', encoding='utf-8') as f:
93
  config_data = yaml.safe_load(f)
94
 
95
  logger.info("Modifying configuration with custom model path...")
96
 
97
+ # Search and update VLRecognition model_dir
98
+ updated = False
 
 
99
  if 'SubModules' in config_data:
100
+ if 'VLRecognition' in config_data['SubModules']:
101
+ config_data['SubModules']['VLRecognition']['model_dir'] = CUSTOM_MODEL_PATH
102
+ updated = True
 
 
 
103
 
104
+ if not updated:
105
+ # Deep search fallback
106
  def deep_update(d):
107
  count = 0
108
  for k, v in d.items():
109
  if k == 'VLRecognition' and isinstance(v, dict):
 
110
  v['model_dir'] = CUSTOM_MODEL_PATH
 
111
  count += 1
112
  elif isinstance(v, dict):
113
  count += deep_update(v)
114
  return count
115
+ updated = deep_update(config_data) > 0
116
+
117
+ if updated:
118
+ logger.info(f"Successfully updated VLRecognition model_dir to {CUSTOM_MODEL_PATH}")
119
+ else:
120
+ logger.warning("Could not find VLRecognition sub-module in the configuration to update its path.")
121
 
122
+ with open(FINAL_CONFIG_FILE, 'w', encoding='utf-8') as f:
 
123
  yaml.dump(config_data, f, default_flow_style=False)
124
 
125
+ # Log final YAML for verification
126
+ logger.info("--- UPDATED YAML CONFIG ---")
127
+ print(yaml.dump(config_data, default_flow_style=False))
128
+ logger.info("--- END UPDATED YAML ---")
129
+
130
+ # 3. Initialize pipeline
131
+ logger.info(f"Initializing PaddleOCRVL with config: {FINAL_CONFIG_FILE}")
132
+ pipeline = PaddleOCRVL(pipeline_config=FINAL_CONFIG_FILE)
133
+ logger.info("PaddleOCRVL initialized successfully.")
 
 
 
 
 
 
 
 
134
 
135
  except Exception as e:
136
  logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
 
139
  # Initial setup
140
  if PADDLE_AVAILABLE:
141
  setup_pipeline()
 
 
142
 
143
  # --- Helper Functions ---
144
 
 
193
  # --- Inference Logic ---
194
 
195
  def run_inference(img_path, task_type="ocr"):
196
+ if not PADDLE_AVAILABLE or pipeline is None:
197
+ return "❌ Paddle backend not available. Check initialization logs.", "", "", ""
 
 
 
 
 
 
 
 
198
 
199
  if not img_path:
200
+ return "⚠️ Please upload an image.", "", "", ""
201
 
202
  try:
203
  logger.info(f"--- Inference Start: {task_type} ---")
 
 
 
204
  output = pipeline.predict(img_path)
 
 
 
205
 
206
  md_content = ""
207
  json_content = ""
 
212
  os.makedirs(run_output_dir, exist_ok=True)
213
 
214
  for i, res in enumerate(output):
 
215
  # Save results
216
  res.save_to_json(save_path=run_output_dir)
217
  res.save_to_markdown(save_path=run_output_dir)
 
 
218
  res.print()
219
 
220
+ # Read back generated files
221
+ fnames = os.listdir(run_output_dir)
222
+ for fname in fnames:
223
+ fpath = os.path.join(run_output_dir, fname)
224
+ if fname.endswith(".md"):
 
 
225
  with open(fpath, 'r', encoding='utf-8') as f:
226
  md_content += f.read() + "\n\n"
227
+ elif fname.endswith(".json"):
228
  with open(fpath, 'r', encoding='utf-8') as f:
229
  json_content += f.read() + "\n\n"
230
+ elif fname.endswith((".png", ".jpg", ".jpeg")) and "res" in fname:
231
  vis_src = image_to_base64_data_url(fpath)
232
+ vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden;">'
 
233
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
234
  vis_html += f'</div>'
235
 
236
  if not md_content:
237
+ md_content = "⚠️ Finished but no content was recognized."
238
 
239
  md_preview = _escape_inequalities_in_math(md_content)
240
+ logger.info("--- Inference Finished ---")
241
  return md_preview, md_content, vis_html, json_content
242
 
243
  except Exception as e:
 
244
  logger.error(f"Inference Error: {e}")
245
+ logger.error(traceback.format_exc())
246
+ return f"❌ Error: {str(e)}", "", "", ""
247
 
248
  # --- UI Components ---
249
+ # (Keeping previous UI logic)
250
 
251
  custom_css = """
252
  body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
 
257
  color: white;
258
  border-radius: 1.5rem;
259
  margin-bottom: 2rem;
 
260
  }
261
+ .app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; }
 
262
  .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
263
+ .output-box { border: 1px solid #e2e8f0 !important; border-radius: 1rem !important; }
 
 
 
 
264
  """
265
 
266
  with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
 
 
 
 
267
  gr.HTML(
268
  """
269
  <div class="app-header">
270
  <h1>🌎 Tachiwin Document Parsing OCR 🦑</h1>
271
+ <p>Fine-tuned for the 68 Indigenous Languages of Mexico</p>
272
  </div>
273
  """
274
  )
275
 
276
  with gr.Row(elem_classes=["notice"]):
277
+ gr.Markdown(f"**Engine:** PaddleOCRVL 1.5 | **Model:** `{CUSTOM_MODEL_PATH}`")
 
 
 
 
 
 
 
 
278
 
279
  with gr.Tabs():
280
+ # Document Parsing Tab
281
  with gr.Tab("πŸ“„ Full Document Parsing"):
282
  with gr.Row():
283
  with gr.Column(scale=5):
284
+ file_doc = gr.File(label="Upload Image", type="filepath")
285
+ preview_doc_html = gr.HTML(visible=False)
286
+ btn_parse = gr.Button("οΏ½ Start Parsing", variant="primary")
287
+ with gr.Row():
288
+ chart_switch = gr.Checkbox(label="Chart OCR", value=True)
289
+ unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
 
290
 
291
  with gr.Column(scale=7):
292
  with gr.Tabs():
293
  with gr.Tab("πŸ“ Markdown View"):
294
  md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
295
  with gr.Tab("πŸ–ΌοΈ Visual Results"):
296
+ vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Waiting for results...</div>')
297
+ with gr.Tab("πŸ“œ Raw Source"):
298
  md_raw_doc = gr.Code(language="markdown")
299
 
300
  file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
301
 
302
  def parse_doc_wrapper(fp, ch, uw):
303
+ res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
304
+ return res_preview, res_vis, res_raw
305
 
306
  btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
307
 
308
+ # Element Recognition Tab
309
  with gr.Tab("🧩 Specific Recognition"):
310
  with gr.Row():
311
  with gr.Column(scale=5):
312
+ file_vl = gr.File(label="Upload Element", type="filepath")
313
+ preview_vl_html = gr.HTML(visible=False)
314
  with gr.Row():
315
  btn_ocr = gr.Button("Text OCR", variant="secondary")
316
+ btn_formula = gr.Button("Formula", variant="secondary")
317
+ btn_table = gr.Button("Table", variant="secondary")
 
 
318
 
319
  with gr.Column(scale=7):
320
  with gr.Tabs():
 
329
  res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
330
  return res_preview, res_raw
331
 
332
+ for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
333
  btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
334
 
335
+ # Spotting Tab
336
  with gr.Tab("πŸ“ Feature Spotting"):
337
  with gr.Row():
338
  with gr.Column(scale=5):
339
+ file_spot = gr.File(label="Target Image", type="filepath")
340
+ preview_spot_html = gr.HTML(visible=False)
341
  btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
342
 
343
  with gr.Column(scale=7):
344
  with gr.Tabs():
345
  with gr.Tab("πŸ–ΌοΈ Detection"):
346
+ vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes view.</div>')
347
  with gr.Tab("πŸ’Ύ JSON Feed"):
348
  json_spot = gr.Code(label="JSON", language="json")
349
 
 
355
 
356
  btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
357
 
358
+ gr.Markdown("--- \n *May the indigenous languages of Mexico never be lost. Tachiwin Project.*")
 
 
 
 
 
 
 
 
 
 
 
359
 
360
  if __name__ == "__main__":
361
  demo.queue().launch()