Luis J Camargo commited on
Commit
9bd7a9e
Β·
1 Parent(s): 610efd0

progres report fix

Browse files
Files changed (1) hide show
  1. app.py +22 -26
app.py CHANGED
@@ -186,9 +186,8 @@ def run_inference(img_path, task_type="ocr", progress=gr.Progress()):
186
 
187
  try:
188
  logger.info(f"--- Inference Start: {task_type} ---")
189
- progress(0, desc="πŸ“¦ Initializing inference engine...")
190
  output = pipeline.predict(input=img_path)
191
- logger.info(f"Output object type: {type(output)}")
192
 
193
  md_content = ""
194
  json_content = ""
@@ -198,60 +197,57 @@ def run_inference(img_path, task_type="ocr", progress=gr.Progress()):
198
  run_output_dir = os.path.join(OUTPUT_DIR, run_id)
199
  os.makedirs(run_output_dir, exist_ok=True)
200
 
201
- logger.info(f"Inference generator ready. Starting iteration...")
202
- progress(0.1, desc="πŸ” Document preprocessing...")
203
 
204
  for i, res in enumerate(output):
205
  logger.info(f"Processing segment {i+1}...")
206
- # Use dynamic progress increment
207
- p_val = min(0.1 + (i + 1) * 0.15, 0.95)
208
- progress(p_val, desc=f"✍️ Recognizing content (segment {i+1})...")
209
 
210
- # Save results to unique dir
211
- seg_dir = os.path.join(run_output_dir, f"seg_{i}")
212
- os.makedirs(seg_dir, exist_ok=True)
213
- res.save_to_json(save_path=seg_dir)
214
- res.save_to_markdown(save_path=seg_dir)
215
  res.print()
216
 
217
- # Gather files specifically from this segment
218
- fnames = os.listdir(seg_dir)
219
  for fname in fnames:
220
- fpath = os.path.join(seg_dir, fname)
221
  if fname.endswith(".md"):
222
  with open(fpath, 'r', encoding='utf-8') as f:
223
  content = f.read()
224
- md_content += content + "\n\n"
 
225
  elif fname.endswith(".json"):
226
  with open(fpath, 'r', encoding='utf-8') as f:
227
  content = f.read()
228
- json_content += content + "\n\n"
 
229
  elif fname.endswith((".png", ".jpg", ".jpeg")) and ("res" in fname or "vis" in fname):
230
  vis_src = image_to_base64_data_url(fpath)
231
  vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
232
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
233
  vis_html += f'</div>'
234
 
235
- # Yield partial results to keep UI alive
236
- partial_md = _escape_inequalities_in_math(md_content)
237
- yield partial_md, md_content, vis_html, json_content
238
  logger.info(f"Finished processing segment {i+1}")
 
 
239
 
240
  if not md_content:
241
  md_content = "⚠️ Finished but no content was recognized."
242
  yield md_content, md_content, "", ""
243
- else:
244
- final_md = _escape_inequalities_in_math(md_content)
245
- progress(1.0, desc="βœ… Complete")
246
- yield final_md, md_content, vis_html, json_content
247
 
248
  logger.info("--- Inference Finished Successfully ---")
 
249
 
250
  except Exception as e:
251
  logger.error(f"❌ Inference Error: {e}")
252
  logger.error(traceback.format_exc())
253
  yield f"❌ Error: {str(e)}", "", "", ""
254
- return
 
 
 
 
255
 
256
  # --- UI Components ---
257
 
@@ -291,7 +287,7 @@ with gr.Blocks() as demo:
291
  with gr.Row():
292
  with gr.Column(scale=5):
293
  file_doc = gr.Image(label="Upload Image", type="filepath")
294
- btn_parse = gr.Button("πŸ” Start Parsing", variant="primary")
295
  with gr.Row():
296
  chart_switch = gr.Checkbox(label="Chart OCR", value=True)
297
  unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
 
186
 
187
  try:
188
  logger.info(f"--- Inference Start: {task_type} ---")
189
+ progress(0, desc="Initializing...")
190
  output = pipeline.predict(input=img_path)
 
191
 
192
  md_content = ""
193
  json_content = ""
 
197
  run_output_dir = os.path.join(OUTPUT_DIR, run_id)
198
  os.makedirs(run_output_dir, exist_ok=True)
199
 
200
+ logger.info(f"will iterate")
 
201
 
202
  for i, res in enumerate(output):
203
  logger.info(f"Processing segment {i+1}...")
204
+ progress(None, desc=f"Processing segment {i+1}...")
 
 
205
 
206
+ # Save results
207
+ res.save_to_json(save_path=run_output_dir)
208
+ res.save_to_markdown(save_path=run_output_dir)
 
 
209
  res.print()
210
 
211
+ # Read back generated files
212
+ fnames = os.listdir(run_output_dir)
213
  for fname in fnames:
214
+ fpath = os.path.join(run_output_dir, fname)
215
  if fname.endswith(".md"):
216
  with open(fpath, 'r', encoding='utf-8') as f:
217
  content = f.read()
218
+ if content not in md_content:
219
+ md_content += content + "\n\n"
220
  elif fname.endswith(".json"):
221
  with open(fpath, 'r', encoding='utf-8') as f:
222
  content = f.read()
223
+ if content not in json_content:
224
+ json_content += content + "\n\n"
225
  elif fname.endswith((".png", ".jpg", ".jpeg")) and ("res" in fname or "vis" in fname):
226
  vis_src = image_to_base64_data_url(fpath)
227
  vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
228
  vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
229
  vis_html += f'</div>'
230
 
 
 
 
231
  logger.info(f"Finished processing segment {i+1}")
232
+ md_preview = _escape_inequalities_in_math(md_content)
233
+ yield md_preview, md_content, vis_html, json_content
234
 
235
  if not md_content:
236
  md_content = "⚠️ Finished but no content was recognized."
237
  yield md_content, md_content, "", ""
 
 
 
 
238
 
239
  logger.info("--- Inference Finished Successfully ---")
240
+ progress(1.0, desc="βœ… Complete")
241
 
242
  except Exception as e:
243
  logger.error(f"❌ Inference Error: {e}")
244
  logger.error(traceback.format_exc())
245
  yield f"❌ Error: {str(e)}", "", "", ""
246
+
247
+ except Exception as e:
248
+ logger.error(f"❌ Inference Error: {e}")
249
+ logger.error(traceback.format_exc())
250
+ return f"❌ Error: {str(e)}", "", "", ""
251
 
252
  # --- UI Components ---
253
 
 
287
  with gr.Row():
288
  with gr.Column(scale=5):
289
  file_doc = gr.Image(label="Upload Image", type="filepath")
290
+ btn_parse = gr.Button("οΏ½ Start Parsing", variant="primary")
291
  with gr.Row():
292
  chart_switch = gr.Checkbox(label="Chart OCR", value=True)
293
  unwarp_switch = gr.Checkbox(label="Unwarping", value=False)