Luis J Camargo commited on
Commit
2ea14b2
Β·
1 Parent(s): 58fd993

refactor: Improve PaddleOCR pipeline setup, configuration, and error handling in `app.py` and add new UI-related reference modules.

Browse files
Files changed (1) hide show
  1. app.py +210 -148
app.py CHANGED
@@ -6,6 +6,7 @@ import re
6
  import logging
7
  import sys
8
  import yaml
 
9
  from typing import Dict, List, Tuple, Any, Optional
10
  import time
11
 
@@ -14,18 +15,9 @@ from PIL import Image
14
  import requests
15
  from urllib.parse import urlparse
16
 
17
- # Paddle imports
18
- try:
19
- from paddleocr import PaddleOCRVL
20
- import paddlex
21
- PADDLE_AVAILABLE = True
22
- except ImportError:
23
- PADDLE_AVAILABLE = False
24
- print("Warning: paddleocr or paddlex not found. Inference will be disabled.")
25
-
26
  # --- Configuration ---
27
- LOGGING_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
28
- logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging.StreamHandler(sys.stderr)])
29
  logger = logging.getLogger("TachiwinDocOCR")
30
 
31
  CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
@@ -41,61 +33,127 @@ LATEX_DELIMS = [
41
  {"left": "\\[", "right": "\\]", "display": True},
42
  ]
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # --- Model Initialization ---
45
  pipeline = None
46
 
47
  def setup_pipeline():
48
  global pipeline
49
  if not PADDLE_AVAILABLE:
 
50
  return
51
 
52
  try:
53
- # 1. Generate default config if it doesn't exist
 
 
 
 
 
 
54
  if not os.path.exists(CONFIG_FILE):
55
- logger.info(f"Generating default configuration file: {CONFIG_FILE}")
56
- # Note: Using the internal paddlex API to get the config
57
- # Equivalent to: paddlex --get_pipeline_config PaddleOCR-VL
58
- from paddlex import create_pipeline
 
 
 
 
 
 
 
 
59
  temp_pipeline = create_pipeline("PaddleOCR-VL")
60
  temp_pipeline.export_pipeline_config(save_path=CONFIG_FILE)
61
- logger.info("Default configuration exported.")
62
 
63
- # 2. Modify config to point to custom model
64
- logger.info(f"Modifying configuration to use custom model: {CUSTOM_MODEL_PATH}")
65
  with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
66
  config_data = yaml.safe_load(f)
67
 
68
- # Update the model_dir for VLRecognition
69
- # Heuristic: Find and update VLRecognition model_dir
70
- if 'SubModules' in config_data and 'VLRecognition' in config_data['SubModules']:
71
- config_data['SubModules']['VLRecognition']['model_dir'] = CUSTOM_MODEL_PATH
72
- logger.info(f"Updated VLRecognition model_dir to {CUSTOM_MODEL_PATH}")
73
- else:
74
- logger.warning("Could not find VLRecognition in config_data['SubModules']. Attempting fallback.")
75
- # Fallback searching through the dict if structure is different
76
- def update_model_dir(d):
 
 
 
 
 
 
 
 
 
77
  for k, v in d.items():
78
  if k == 'VLRecognition' and isinstance(v, dict):
 
79
  v['model_dir'] = CUSTOM_MODEL_PATH
80
- return True
81
- if isinstance(v, dict):
82
- if update_model_dir(v): return True
83
- return False
84
- update_model_dir(config_data)
 
 
 
85
 
 
86
  with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
87
- yaml.dump(config_data, f)
88
 
89
- # 3. Initialize pipeline with modified config
90
- logger.info(f"Initializing PaddleOCRVL with config: {CONFIG_FILE}")
91
- pipeline = PaddleOCRVL(pipeline_config=CONFIG_FILE)
92
- logger.info("PaddleOCRVL initialized successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
  except Exception as e:
95
- logger.error(f"Failed to initialize PaddleOCRVL: {e}")
 
96
 
 
97
  if PADDLE_AVAILABLE:
98
  setup_pipeline()
 
 
99
 
100
  # --- Helper Functions ---
101
 
@@ -116,7 +174,7 @@ def image_to_base64_data_url(filepath: str) -> str:
116
 
117
  def _escape_inequalities_in_math(md: str) -> str:
118
  _MATH_PATTERNS = [
119
- re.compile(r"\$\$([\s\S]+?)\$\$"),
120
  re.compile(r"\$([^\$]+?)\$"),
121
  re.compile(r"\\\[([\s\S]+?)\\\]"),
122
  re.compile(r"\\\(([\s\S]+?)\\\)"),
@@ -141,8 +199,8 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
141
  src = image_to_base64_data_url(path_or_url)
142
 
143
  html_content = f"""
144
- <div class="uploaded-image">
145
- <img src="{src}" alt="Preview image" style="width:100%;height:100%;object-fit:contain;" loading="lazy"/>
146
  </div>
147
  """
148
  return gr.update(value=html_content, visible=True)
@@ -150,214 +208,218 @@ def update_preview_visibility(path_or_url: Optional[str]) -> Dict:
150
  # --- Inference Logic ---
151
 
152
  def run_inference(img_path, task_type="ocr"):
153
- if not PADDLE_AVAILABLE or pipeline is None:
154
- return "PaddleOCRVL is not available or failed to load. Ensure paddlex and paddleocr are installed.", "", "", ""
 
 
 
 
 
 
 
 
155
 
156
  if not img_path:
157
- return "Please upload an image.", "", "", ""
158
 
159
  try:
160
- logger.info(f"Running inference for {img_path} with task {task_type}")
 
161
 
162
- # Adjust pipeline parameters based on task_type if needed
163
- # PaddleOCRVL predict as per documentation
164
  output = pipeline.predict(img_path)
 
 
 
165
 
166
  md_content = ""
167
  json_content = ""
168
  vis_html = ""
169
 
170
- run_id = str(int(time.time()))
171
  run_output_dir = os.path.join(OUTPUT_DIR, run_id)
172
  os.makedirs(run_output_dir, exist_ok=True)
173
 
174
  for i, res in enumerate(output):
175
- # Save outputs
 
176
  res.save_to_json(save_path=run_output_dir)
177
  res.save_to_markdown(save_path=run_output_dir)
178
 
179
- # Print for logs
180
  res.print()
181
 
182
- # Extract content from generated files
183
- for root, dirs, files in os.walk(run_output_dir):
184
- for file in files:
185
- file_full_path = os.path.join(root, file)
186
- if file.endswith(".md"):
187
- with open(file_full_path, 'r', encoding='utf-8') as f:
188
- md_content += f.read() + "\n\n"
189
- elif file.endswith(".json"):
190
- with open(file_full_path, 'r', encoding='utf-8') as f:
191
- json_content += f.read() + "\n\n"
192
- elif file.endswith((".png", ".jpg", ".jpeg")) and "res" in file:
193
- # Found a visualization image
194
- vis_src = image_to_base64_data_url(file_full_path)
195
- vis_html += f'<div style="margin-bottom:20px;">'
196
- vis_html += f'<p style="font-weight:bold;">Visualization {i+1}:</p>'
197
- vis_html += f'<img src="{vis_src}" alt="Visualization {i+1}" style="width:100%; border-radius: 8px; border: 1px solid #ddd;">'
198
- vis_html += f'</div>'
 
199
 
200
  if not md_content:
201
- md_content = "No text recognized."
202
 
203
  md_preview = _escape_inequalities_in_math(md_content)
204
-
205
  return md_preview, md_content, vis_html, json_content
206
 
207
  except Exception as e:
208
- logger.error(f"Inference failed: {e}")
209
- return f"Error: {str(e)}", "", "", ""
 
 
210
 
211
  # --- UI Components ---
212
 
213
- css = """
214
- body, .gradio-container { font-family: 'Inter', -apple-system, system-ui, sans-serif; }
215
  .app-header {
216
  text-align: center;
217
- padding: 30px;
218
- background: linear-gradient(120deg, rgb(2, 132, 199) 0%, rgb(16, 185, 129) 60%, rgb(5, 150, 105) 100%);
219
  color: white;
220
- border-radius: 15px;
221
- margin-bottom: 25px;
222
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
223
  }
224
- .app-header h1 { color: white !important; margin: 0; font-size: 2.5em; }
225
- .app-header p { font-size: 1.2em; opacity: 0.9; margin-top: 10px; }
226
- .notice { margin: 8px auto 0; max-width: 900px; padding: 10px 12px; border: 1px solid #e5e7eb; border-radius: 8px; background: #f8fafc; font-size: 14px; line-height: 1.6; }
227
- .quick-links { text-align: center; padding: 8px 0; border: 1px solid #e5e7eb; border-radius: 8px; margin: 8px auto; max-width: 900px; }
228
- .quick-links a { margin: 0 12px; font-size: 14px; font-weight: 600; color: #3b82f6; text-decoration: none; }
229
- .quick-links a:hover { text-decoration: underline; }
230
- #image_preview_doc, #image_preview_vl, #image_preview_spot { height: 400px !important; overflow: auto; border: 1px solid #ddd; border-radius: 8px; background: #eee; }
231
- #image_preview_doc img, #image_preview_vl img, #image_preview_spot img { width: 100% !important; height: auto !important; object-fit: contain !important; display: block; }
232
- .output_markdown { min-height: 30rem !important; font-size: 1.1rem !important; line-height: 1.6 !important; }
233
- .prose pre { background: #f1f5f9 !important; border-radius: 8px !important; padding: 10px !important; }
234
  """
235
 
236
- with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
237
- # Header branding
 
 
 
238
  gr.HTML(
239
  """
240
  <div class="app-header">
241
  <h1>🌎 Tachiwin Document Parsing OCR 🦑</h1>
242
- <p>Advancing Linguistic Rights for the 68 Indigenous Languages of Mexico</p>
243
  </div>
244
  """
245
  )
246
 
247
- gr.HTML(f"""
248
- <div class="notice">
249
- <strong>Powered by PaddleOCRVL 1.5:</strong> Optimized for in-the-wild document parsing and fine-tuned for indigenous languages.
250
- Initializing with custom weights: <code>{CUSTOM_MODEL_PATH}</code>
251
- </div>
252
- """)
253
 
254
- gr.HTML("""<div class="quick-links"><a href="https://github.com/ljcamargo/tachiwin_paddleocrvl_finetuning" target="_blank">GitHub</a> | <a href="https://huggingface.co/PaddlePaddle/PaddleOCR-VL-1.5" target="_blank">Base Model</a> | <a href="https://www.paddleocr.com" target="_blank">Documentation</a></div>""")
 
 
 
255
 
256
  with gr.Tabs():
257
  # --- Tab 1: Document Parsing ---
258
- with gr.Tab("πŸ“„ Document Parsing"):
259
  with gr.Row():
260
  with gr.Column(scale=5):
261
- file_doc = gr.File(label="Upload Document Image", file_count="single", type="filepath", file_types=["image"])
262
  preview_doc_html = gr.HTML(value="", elem_id="image_preview_doc", visible=False)
263
  with gr.Row(variant="panel"):
264
- with gr.Column(scale=2):
265
- btn_parse = gr.Button("πŸš€ Parse Document", variant="primary")
266
- with gr.Column(scale=3):
267
- with gr.Row():
268
- chart_switch = gr.Checkbox(label="Chart parsing", value=True)
269
- unwarp_switch = gr.Checkbox(label="Doc unwarping", value=False)
270
 
271
  with gr.Column(scale=7):
272
  with gr.Tabs():
273
- with gr.Tab("πŸ“ Markdown Preview"):
274
- md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output_markdown")
275
- with gr.Tab("πŸ–ΌοΈ Visualization"):
276
- vis_image_doc = gr.HTML("<p style='text-align:center; color:#888; padding: 20px;'>Parsing results will be visualized here.</p>")
277
  with gr.Tab("πŸ“œ Markdown Source"):
278
  md_raw_doc = gr.Code(language="markdown")
279
 
280
  file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
281
 
282
  def parse_doc_wrapper(fp, ch, uw):
283
- if not fp: return "Please upload an image.", "", "", ""
284
- res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="document")
285
- return res_preview, res_vis, res_raw
286
 
287
  btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
288
 
289
  # --- Tab 2: Element Recognition ---
290
- with gr.Tab("🧩 Element Recognition"):
291
  with gr.Row():
292
  with gr.Column(scale=5):
293
- file_vl = gr.File(label="Upload Element Image", file_count="single", type="filepath", file_types=["image"])
294
  preview_vl_html = gr.HTML(value="", elem_id="image_preview_vl", visible=False)
295
  with gr.Row():
296
- btn_ocr = gr.Button("Text Recognition", variant="secondary")
297
- btn_formula = gr.Button("Formula Recognition", variant="secondary")
298
  with gr.Row():
299
- btn_table = gr.Button("Table Recognition", variant="secondary")
300
- btn_chart = gr.Button("Chart Recognition", variant="secondary")
301
 
302
  with gr.Column(scale=7):
303
  with gr.Tabs():
304
  with gr.Tab("πŸ“Š Result"):
305
- md_preview_vl = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output_markdown")
306
- with gr.Tab("πŸ“œ Raw Output"):
307
  md_raw_vl = gr.Code(language="markdown")
308
 
309
  file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
310
 
311
  def run_vl_wrapper(fp, prompt):
312
- if not fp: return "Please upload an image.", "", ""
313
- res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type=prompt)
314
  return res_preview, res_raw
315
 
316
- for btn, prompt in [(btn_ocr, "Text Recognition"), (btn_formula, "Formula Recognition"), (btn_table, "Table Recognition"), (btn_chart, "Chart Recognition")]:
317
  btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
318
 
319
  # --- Tab 3: Spotting ---
320
- with gr.Tab("πŸ“ Spotting"):
321
  with gr.Row():
322
  with gr.Column(scale=5):
323
- file_spot = gr.File(label="Upload Image for Detection", file_count="single", type="filepath", file_types=["image"])
324
  preview_spot_html = gr.HTML(value="", elem_id="image_preview_spot", visible=False)
325
- btn_run_spot = gr.Button("Run Spotting", variant="primary")
326
 
327
  with gr.Column(scale=7):
328
  with gr.Tabs():
329
- with gr.Tab("πŸ–ΌοΈ Visualization"):
330
- vis_image_spot = gr.HTML("<p style='text-align:center; color:#888; padding: 20px;'>Detection visualization.</p>")
331
- with gr.Tab("πŸ’Ύ JSON Result"):
332
- json_spot = gr.Code(label="Detection Results", language="json")
333
 
334
  file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
335
 
336
  def run_spotting_wrapper(fp):
337
- if not fp: return "", ""
338
- res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="spotting")
339
- return res_vis, res_json
340
 
341
  btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
342
 
343
- # Footer Information
344
  gr.Markdown(
345
  """
346
  ---
347
- ### ℹ️ About Tachiwin 🦑
348
-
349
- **Tachiwin** (from Totonac - "Language") is dedicated to bridging the digital divide for indigenous languages of Mexico through AI technology. This model represents a **world first in tech access and linguistic rights**, specifically trained to recognize the 68 indigenous languages of Mexico.
350
-
351
- ### Supported Language Families
352
 
353
- **Uto-Aztecan:** NΓ‘huatl, Yaqui, Mayo, Huichol, TepehuΓ‘n, Tarahumara
354
- **Mayan:** Maya, Tzeltal, Tzotzil, Chol, Tojolabal, Q'anjob'al, Mam
355
- **Oto-Manguean:** Zapoteco, Mixteco, OtomΓ­, Mazateco, Chinanteco, Triqui
356
- **Totonac-Tepehua:** Totonaco, Tepehua
357
- **Mixe-Zoque:** Mixe, Zoque, Popoluca
358
- **Other:** PurΓ©pecha, Huave, Seri, Kickapoo, Kiliwa
359
 
360
- Made with ❀️ for linguistic diversity and indigenous rights 🦑
361
  """
362
  )
363
 
 
6
  import logging
7
  import sys
8
  import yaml
9
+ import traceback
10
  from typing import Dict, List, Tuple, Any, Optional
11
  import time
12
 
 
15
  import requests
16
  from urllib.parse import urlparse
17
 
 
 
 
 
 
 
 
 
 
18
  # --- Configuration ---
19
+ LOGGING_FORMAT = '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
20
+ logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT, handlers=[logging.StreamHandler(sys.stdout)])
21
  logger = logging.getLogger("TachiwinDocOCR")
22
 
23
  CUSTOM_MODEL_PATH = "tachiwin/Tachiwin-OCR-1.5"
 
33
  {"left": "\\[", "right": "\\]", "display": True},
34
  ]
35
 
36
+ # --- Paddle imports and Diagnostic ---
37
+ PADDLE_AVAILABLE = False
38
+ PADDLEX_VERSION = "Unknown"
39
+ PADDLEOCR_VERSION = "Unknown"
40
+
41
+ try:
42
+ import paddle
43
+ import paddlex
44
+ from paddlex import create_pipeline
45
+ from paddleocr import PaddleOCRVL
46
+ PADDLE_AVAILABLE = True
47
+ PADDLEX_VERSION = getattr(paddlex, "__version__", "Unknown")
48
+ logger.info(f"Paddle libraries loaded. PaddleX version: {PADDLEX_VERSION}")
49
+ except ImportError as e:
50
+ logger.error(f"Import Error: {e}")
51
+ logger.error(traceback.format_exc())
52
+ except Exception as e:
53
+ logger.error(f"Unexpected error during import: {e}")
54
+ logger.error(traceback.format_exc())
55
+
56
  # --- Model Initialization ---
57
  pipeline = None
58
 
59
  def setup_pipeline():
60
  global pipeline
61
  if not PADDLE_AVAILABLE:
62
+ logger.error("Skipping pipeline setup because Paddle is not available.")
63
  return
64
 
65
  try:
66
+ logger.info("Starting setup_pipeline...")
67
+
68
+ # 1. Generate default config via CLI-like method to avoid early model download
69
+ # We'll use create_pipeline and then export_pipeline_config, but we need to be careful
70
+ # as create_pipeline might download the model immediately.
71
+
72
+ # If the file exists, we'll read it. If not, we'll try to create a minimal one or use paddlex CLI.
73
  if not os.path.exists(CONFIG_FILE):
74
+ logger.info(f"Generating default configuration for PaddleOCR-VL...")
75
+ # Ideally: paddlex --get_pipeline_config PaddleOCR-VL
76
+ # We can try to get it from paddlex registry if documented
77
+ try:
78
+ from paddlex.inference.pipelines import pipeline_registry
79
+ # This is internal, but let's try to find if we can get the default dict
80
+ logger.info(f"Registered pipelines: {list(pipeline_registry.keys())[:5]}...")
81
+ except:
82
+ pass
83
+
84
+ # Fallback: Create a temporary pipeline to export config
85
+ logger.info("Initializing a temporary pipeline to export default configuration...")
86
  temp_pipeline = create_pipeline("PaddleOCR-VL")
87
  temp_pipeline.export_pipeline_config(save_path=CONFIG_FILE)
88
+ logger.info(f"Default configuration exported to {CONFIG_FILE}")
89
 
90
+ # 2. Load and Modify Config
91
+ logger.info(f"Loading configuration from {CONFIG_FILE}")
92
  with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
93
  config_data = yaml.safe_load(f)
94
 
95
+ logger.info("Modifying configuration with custom model path...")
96
+
97
+ # Rigorous path search and modification
98
+ modified = False
99
+
100
+ # Check standard PaddleX structure
101
+ if 'SubModules' in config_data:
102
+ for sub_name, sub_cfg in config_data['SubModules'].items():
103
+ if sub_name == 'VLRecognition':
104
+ old_path = sub_cfg.get('model_dir')
105
+ sub_cfg['model_dir'] = CUSTOM_MODEL_PATH
106
+ logger.info(f"Success: Updated SubModules.VLRecognition.model_dir from '{old_path}' to '{CUSTOM_MODEL_PATH}'")
107
+ modified = True
108
+
109
+ if not modified:
110
+ logger.warning("Standard SubModules.VLRecognition path not found. performing deep search...")
111
+ def deep_update(d):
112
+ count = 0
113
  for k, v in d.items():
114
  if k == 'VLRecognition' and isinstance(v, dict):
115
+ old = v.get('model_dir')
116
  v['model_dir'] = CUSTOM_MODEL_PATH
117
+ logger.info(f"Deep search found VLRecognition. Updated model_dir from '{old}' to '{CUSTOM_MODEL_PATH}'")
118
+ count += 1
119
+ elif isinstance(v, dict):
120
+ count += deep_update(v)
121
+ return count
122
+
123
+ if deep_update(config_data) > 0:
124
+ modified = True
125
 
126
+ # Save modified config
127
  with open(CONFIG_FILE, 'w', encoding='utf-8') as f:
128
+ yaml.dump(config_data, f, default_flow_style=False)
129
 
130
+ # 3. Log the final YAML to console as requested
131
+ logger.info("--- FINAL YAML CONFIGURATION ---")
132
+ yaml_str = yaml.dump(config_data, default_flow_style=False)
133
+ print(yaml_str)
134
+ logger.info("--- END FINAL YAML CONFIGURATION ---")
135
+
136
+ # 4. Initialize pipeline with modified config
137
+ logger.info(f"Initializing PaddleOCRVL with custom config file: {CONFIG_FILE}")
138
+ # Note: We use PaddleOCRVL(pipeline_config=CONFIG_FILE) as per our research
139
+ # If that fails, we can try create_pipeline(CONFIG_FILE)
140
+ try:
141
+ pipeline = PaddleOCRVL(pipeline_config=CONFIG_FILE)
142
+ logger.info("Success: PaddleOCRVL initialized with custom config.")
143
+ except Exception as e:
144
+ logger.warning(f"PaddleOCRVL(pipeline_config=...) failed: {e}. Trying create_pipeline(path_to_yaml)...")
145
+ pipeline = create_pipeline(CONFIG_FILE)
146
+ logger.info("Success: Pipeline initialized using create_pipeline(CONFIG_FILE).")
147
 
148
  except Exception as e:
149
+ logger.error(f"CRITICAL: Failed to setup pipeline: {e}")
150
+ logger.error(traceback.format_exc())
151
 
152
+ # Initial setup
153
  if PADDLE_AVAILABLE:
154
  setup_pipeline()
155
+ else:
156
+ logger.error("Inference backend disabled: Paddle libraries not found.")
157
 
158
  # --- Helper Functions ---
159
 
 
174
 
175
  def _escape_inequalities_in_math(md: str) -> str:
176
  _MATH_PATTERNS = [
177
+ re.compile(r"\$$([\s\S]+?)\$$"),
178
  re.compile(r"\$([^\$]+?)\$"),
179
  re.compile(r"\\\[([\s\S]+?)\\\]"),
180
  re.compile(r"\\\(([\s\S]+?)\\\)"),
 
199
  src = image_to_base64_data_url(path_or_url)
200
 
201
  html_content = f"""
202
+ <div class="uploaded-image" style="background: white; padding: 10px; border-radius: 8px;">
203
+ <img src="{src}" alt="Preview" style="width:100%; height:auto; max-height:800px; object-fit:contain;"/>
204
  </div>
205
  """
206
  return gr.update(value=html_content, visible=True)
 
208
  # --- Inference Logic ---
209
 
210
  def run_inference(img_path, task_type="ocr"):
211
+ status_msg = ""
212
+ if not PADDLE_AVAILABLE:
213
+ status_msg = "❌ Paddle libraries not installed."
214
+ logger.error(status_msg)
215
+ return status_msg, "", "", ""
216
+
217
+ if pipeline is None:
218
+ status_msg = "❌ Pipeline failed to initialize. Check logs for details."
219
+ logger.error(status_msg)
220
+ return status_msg, "", "", ""
221
 
222
  if not img_path:
223
+ return "⚠️ Please upload an image first.", "", "", ""
224
 
225
  try:
226
+ logger.info(f"--- Inference Start: {task_type} ---")
227
+ logger.info(f"Image: {img_path}")
228
 
229
+ start_time = time.time()
 
230
  output = pipeline.predict(img_path)
231
+ end_time = time.time()
232
+
233
+ logger.info(f"Inference completed in {end_time - start_time:.2f} seconds.")
234
 
235
  md_content = ""
236
  json_content = ""
237
  vis_html = ""
238
 
239
+ run_id = f"run_{int(time.time())}"
240
  run_output_dir = os.path.join(OUTPUT_DIR, run_id)
241
  os.makedirs(run_output_dir, exist_ok=True)
242
 
243
  for i, res in enumerate(output):
244
+ logger.info(f"Processing output segment {i+1}...")
245
+ # Save results
246
  res.save_to_json(save_path=run_output_dir)
247
  res.save_to_markdown(save_path=run_output_dir)
248
 
249
+ # Print to stdout
250
  res.print()
251
 
252
+ # Read files back for Gradio
253
+ files_found = os.listdir(run_output_dir)
254
+ logger.info(f"Generated files: {files_found}")
255
+
256
+ for file in files_found:
257
+ fpath = os.path.join(run_output_dir, file)
258
+ if file.endswith(".md"):
259
+ with open(fpath, 'r', encoding='utf-8') as f:
260
+ md_content += f.read() + "\n\n"
261
+ elif file.endswith(".json"):
262
+ with open(fpath, 'r', encoding='utf-8') as f:
263
+ json_content += f.read() + "\n\n"
264
+ elif file.endswith((".png", ".jpg", ".jpeg")) and ("res" in file or "vis" in file):
265
+ vis_src = image_to_base64_data_url(fpath)
266
+ vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background: white;">'
267
+ vis_html += f'<div style="background: #10b981; color: white; padding: 5px 15px; font-weight: bold;">Visualization {i+1}</div>'
268
+ vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
269
+ vis_html += f'</div>'
270
 
271
  if not md_content:
272
+ md_content = "⚠️ OCR finished but no text was extracted."
273
 
274
  md_preview = _escape_inequalities_in_math(md_content)
275
+ logger.info("--- Inference Finished Successfully ---")
276
  return md_preview, md_content, vis_html, json_content
277
 
278
  except Exception as e:
279
+ err_detail = traceback.format_exc()
280
+ logger.error(f"Inference Error: {e}")
281
+ logger.error(err_detail)
282
+ return f"❌ Error: {str(e)}\n\nCheck logs for more details.", "", "", ""
283
 
284
  # --- UI Components ---
285
 
286
+ custom_css = """
287
+ body, .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
288
  .app-header {
289
  text-align: center;
290
+ padding: 2.5rem;
291
+ background: linear-gradient(135deg, #0284c7 0%, #10b981 100%);
292
  color: white;
293
+ border-radius: 1.5rem;
294
+ margin-bottom: 2rem;
295
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1);
296
  }
297
+ .app-header h1 { color: white !important; font-weight: 800; font-size: 2.5rem; margin-bottom: 0.5rem; }
298
+ .app-header p { font-size: 1.25rem; opacity: 0.95; }
299
+ .notice { background: #f0fdf4; border: 1px solid #bbf7d0; color: #166534; padding: 1rem; border-radius: 1rem; margin-bottom: 2rem; }
300
+ .quick-links { display: flex; justify-content: center; gap: 1.5rem; margin-bottom: 2rem; font-weight: 600; }
301
+ .quick-links a { color: #0284c7; text-decoration: none; transition: color 0.2s; }
302
+ .quick-links a:hover { color: #0369a1; text-decoration: underline; }
303
+ .output-box { border-radius: 1rem !important; border: 1px solid #e2e8f0 !important; }
304
+ .status-indicator { font-family: monospace; font-size: 0.875rem; color: #64748b; margin-top: 0.5rem; }
 
 
305
  """
306
 
307
+ with gr.Blocks(theme=gr.themes.Ocean(), css=custom_css) as demo:
308
+ # Diagnostic Info
309
+ gr.HTML(f"""<div style="display:none">Paddle Status: {PADDLE_AVAILABLE}, X: {PADDLEX_VERSION}</div>""")
310
+
311
+ # Branding Header
312
  gr.HTML(
313
  """
314
  <div class="app-header">
315
  <h1>🌎 Tachiwin Document Parsing OCR 🦑</h1>
316
+ <p>Empowering the Indigenous Languages of Mexico through State-of-the-Art OCR</p>
317
  </div>
318
  """
319
  )
320
 
321
+ with gr.Row(elem_classes=["notice"]):
322
+ gr.Markdown(f"""
323
+ **πŸš€ Engine Status:** Using **PaddleOCRVL 1.5** with custom weights: `{CUSTOM_MODEL_PATH}`.
324
+ Supported Languages: 68 Official Mexican Indigenous Languages.
325
+ """)
 
326
 
327
+ with gr.Row(elem_classes=["quick-links"]):
328
+ gr.HTML('<a href="https://github.com/ljcamargo/tachiwin_paddleocrvl_finetuning" target="_blank">πŸ’» GitHub</a>')
329
+ gr.HTML('<a href="https://huggingface.co/tachiwin/PaddleOCR-VL-Tachiwin-BF16" target="_blank">πŸ€— Model Repo</a>')
330
+ gr.HTML('<a href="https://www.paddleocr.com" target="_blank">πŸ“š Documentation</a>')
331
 
332
  with gr.Tabs():
333
  # --- Tab 1: Document Parsing ---
334
+ with gr.Tab("πŸ“„ Full Document Parsing"):
335
  with gr.Row():
336
  with gr.Column(scale=5):
337
+ file_doc = gr.File(label="Upload Image", file_count="single", type="filepath", file_types=["image"])
338
  preview_doc_html = gr.HTML(value="", elem_id="image_preview_doc", visible=False)
339
  with gr.Row(variant="panel"):
340
+ btn_parse = gr.Button("πŸ” Start Parsing", variant="primary", scale=2)
341
+ with gr.Column(scale=1):
342
+ chart_switch = gr.Checkbox(label="Chart OCR", value=True)
343
+ unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
 
 
344
 
345
  with gr.Column(scale=7):
346
  with gr.Tabs():
347
+ with gr.Tab("πŸ“ Markdown View"):
348
+ md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
349
+ with gr.Tab("πŸ–ΌοΈ Visual Results"):
350
+ vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Upload and parse to see visual results.</div>')
351
  with gr.Tab("πŸ“œ Markdown Source"):
352
  md_raw_doc = gr.Code(language="markdown")
353
 
354
  file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
355
 
356
  def parse_doc_wrapper(fp, ch, uw):
357
+ return run_inference(fp, task_type="Document Parsing")[:3] # Returns Preview, Vis, Raw
 
 
358
 
359
  btn_parse.click(parse_doc_wrapper, [file_doc, chart_switch, unwarp_switch], [md_preview_doc, vis_image_doc, md_raw_doc])
360
 
361
  # --- Tab 2: Element Recognition ---
362
+ with gr.Tab("🧩 Specific Recognition"):
363
  with gr.Row():
364
  with gr.Column(scale=5):
365
+ file_vl = gr.File(label="Upload Element", file_count="single", type="filepath", file_types=["image"])
366
  preview_vl_html = gr.HTML(value="", elem_id="image_preview_vl", visible=False)
367
  with gr.Row():
368
+ btn_ocr = gr.Button("Text OCR", variant="secondary")
369
+ btn_formula = gr.Button("Math Formula", variant="secondary")
370
  with gr.Row():
371
+ btn_table = gr.Button("Table Data", variant="secondary")
372
+ btn_chart = gr.Button("Chart Data", variant="secondary")
373
 
374
  with gr.Column(scale=7):
375
  with gr.Tabs():
376
  with gr.Tab("πŸ“Š Result"):
377
+ md_preview_vl = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
378
+ with gr.Tab("πŸ“œ Source"):
379
  md_raw_vl = gr.Code(language="markdown")
380
 
381
  file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
382
 
383
  def run_vl_wrapper(fp, prompt):
384
+ res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
 
385
  return res_preview, res_raw
386
 
387
+ for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table"), (btn_chart, "Chart")]:
388
  btn.click(run_vl_wrapper, [file_vl, gr.State(prompt)], [md_preview_vl, md_raw_vl])
389
 
390
  # --- Tab 3: Spotting ---
391
+ with gr.Tab("πŸ“ Feature Spotting"):
392
  with gr.Row():
393
  with gr.Column(scale=5):
394
+ file_spot = gr.File(label="Target Image", file_count="single", type="filepath", file_types=["image"])
395
  preview_spot_html = gr.HTML(value="", elem_id="image_preview_spot", visible=False)
396
+ btn_run_spot = gr.Button("🎯 Run Spotting", variant="primary")
397
 
398
  with gr.Column(scale=7):
399
  with gr.Tabs():
400
+ with gr.Tab("πŸ–ΌοΈ Detection"):
401
+ vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes will appear here.</div>')
402
+ with gr.Tab("πŸ’Ύ JSON Feed"):
403
+ json_spot = gr.Code(label="JSON", language="json")
404
 
405
  file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
406
 
407
  def run_spotting_wrapper(fp):
408
+ _, _, vis, js = run_inference(fp, task_type="Spotting")
409
+ return vis, js
 
410
 
411
  btn_run_spot.click(run_spotting_wrapper, file_spot, [vis_image_spot, json_spot])
412
 
413
+ # Footer
414
  gr.Markdown(
415
  """
416
  ---
417
+ ### 🌎 Tachiwin Project 🦑
418
+ Dedicated to bridging the digital divide for the 68 officially recognized indigenous languages of Mexico.
 
 
 
419
 
420
+ **Supported Families:** Uto-Aztecan, Mayan, Oto-Manguean, Totonac-Tepehua, Mixe-Zoque, and more.
 
 
 
 
 
421
 
422
+ *Linguistic rights are human rights.*
423
  """
424
  )
425