Fabio Antonini Claude Sonnet 4.6 commited on
Commit
e479746
·
1 Parent(s): ad2e396

fix: isolate agent temp files per session + fix PaddlePaddle PIR on Windows

Browse files

- Use a unique UUID subfolder (/tmp/gl/<session_id>/) per agent_stream call
so concurrent sessions never share/overwrite temp files
- Clean up the session temp dir in a finally block after streaming ends
- Disable PaddlePaddle PIR (FLAGS_enable_pir_api=0) and MKL-DNN at import
time in document_layout.py to prevent ConvertPirAttribute2RuntimeAttribute
errors on Windows with PaddlePaddle 3.x
- Auto-fallback from LayoutDetection to PPStructure if new API fails at
runtime (not just at import time)
- Improve analisi_layout error message to avoid LLM hallucinating wrong causes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. core/agent.py +16 -5
  2. core/document_layout.py +52 -16
core/agent.py CHANGED
@@ -274,7 +274,12 @@ def analisi_layout(image_path: str) -> str:
274
  from core.document_layout import detect_layout
275
  result = detect_layout(image_path)
276
  if "error" in result:
277
- return f"Errore layout detection: {result['error']}"
 
 
 
 
 
278
  regions = result.get("regions", [])
279
  if not regions:
280
  return "Nessuna regione strutturata rilevata nel documento."
@@ -494,10 +499,12 @@ def agent_stream(
494
  model = get_active_model()
495
  from langchain_core.messages import HumanMessage, AIMessage
496
 
497
- # Copy uploaded files to a short, predictable temp path so the LLM
498
- # does not mangle long Windows AppData paths when constructing tool args.
499
- _gl_tmp = Path(tempfile.gettempdir()) / "gl"
500
- _gl_tmp.mkdir(exist_ok=True)
 
 
501
  short_paths: list[str] = []
502
  for i, p in enumerate(file_paths):
503
  ext = Path(p).suffix or ".png"
@@ -584,3 +591,7 @@ def agent_stream(
584
 
585
  except Exception as e:
586
  yield f"{accumulated}\n\n❌ Errore dell'agente: {e}"
 
 
 
 
 
274
  from core.document_layout import detect_layout
275
  result = detect_layout(image_path)
276
  if "error" in result:
277
+ err = result['error']
278
+ # Return a plain technical error; do NOT speculate on the cause
279
+ return (
280
+ f"Errore tecnico nel modello di layout detection: {err}\n"
281
+ "Nota: questo è un errore del backend PaddlePaddle, non dipende dal tipo di file."
282
+ )
283
  regions = result.get("regions", [])
284
  if not regions:
285
  return "Nessuna regione strutturata rilevata nel documento."
 
499
  model = get_active_model()
500
  from langchain_core.messages import HumanMessage, AIMessage
501
 
502
+ # Copy uploaded files into a unique per-session subfolder so concurrent
503
+ # requests never overwrite each other's files (e.g. /tmp/gl/<uuid>/f0.jpg).
504
+ import uuid as _uuid
505
+ _session_id = _uuid.uuid4().hex[:12]
506
+ _gl_tmp = Path(tempfile.gettempdir()) / "gl" / _session_id
507
+ _gl_tmp.mkdir(parents=True, exist_ok=True)
508
  short_paths: list[str] = []
509
  for i, p in enumerate(file_paths):
510
  ext = Path(p).suffix or ".png"
 
591
 
592
  except Exception as e:
593
  yield f"{accumulated}\n\n❌ Errore dell'agente: {e}"
594
+ finally:
595
+ # Clean up the per-session temp directory after streaming completes
596
+ if _gl_tmp.exists():
597
+ shutil.rmtree(_gl_tmp, ignore_errors=True)
core/document_layout.py CHANGED
@@ -14,6 +14,7 @@ from __future__ import annotations
14
  import base64
15
  import io
16
  import json
 
17
  import threading
18
  from pathlib import Path
19
  from typing import Any
@@ -22,6 +23,12 @@ import numpy as np
22
  import requests
23
  from PIL import Image
24
 
 
 
 
 
 
 
25
  # ──────────────────────────────────────────────────────────────────────────────
26
  # Lazy model state
27
  # ──────────────────────────────────────────────────────────────────────────────
@@ -35,29 +42,40 @@ def _get_layout():
35
  """Lazy-load layout detection engine.
36
 
37
  Tries the new PaddleOCR 2.8+ LayoutDetection API first;
38
- falls back to the stable PPStructure API if not available.
39
  """
40
  global _layout_engine
41
  if _layout_engine is None:
42
  with _lock:
43
  if _layout_engine is None:
44
- try:
45
- # PaddleOCR 2.8+ / PaddleX 3.x API
46
- from paddleocr import LayoutDetection # type: ignore
47
- _layout_engine = ("new", LayoutDetection())
48
- except (ImportError, AttributeError):
49
- # Fallback: stable PPStructure API (all PaddleOCR versions)
50
- from paddleocr import PPStructure # type: ignore
51
- engine = PPStructure(
52
- table=False,
53
- ocr=False,
54
- show_log=False,
55
- layout=True,
56
- )
57
- _layout_engine = ("old", engine)
58
  return _layout_engine
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def _get_ocr():
62
  """Lazy-load PaddleOCR text recognition engine."""
63
  global _ocr_engine
@@ -149,6 +167,7 @@ def detect_layout(image_path: str) -> dict:
149
  - bbox: list [x1, y1, x2, y2] pixel coordinates
150
  - score: float confidence score
151
  """
 
152
  api_version, layout = _get_layout()
153
  try:
154
  if api_version == "new":
@@ -162,7 +181,24 @@ def detect_layout(image_path: str) -> dict:
162
  raw = layout(img)
163
  return _parse_old_api(raw)
164
  except Exception as e:
165
- return {"regions": [], "error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
 
168
  def extract_ordered_text(image_path: str) -> str:
 
14
  import base64
15
  import io
16
  import json
17
+ import os
18
  import threading
19
  from pathlib import Path
20
  from typing import Any
 
23
  import requests
24
  from PIL import Image
25
 
26
+ # Disable PaddlePaddle PIR (Program IR) introduced in Paddle 3.x — it triggers
27
+ # "ConvertPirAttribute2RuntimeAttribute not supported" errors for layout models
28
+ # on Windows. Setting this before any paddle import ensures CPU-only stable path.
29
+ os.environ.setdefault("FLAGS_enable_pir_api", "0")
30
+ os.environ.setdefault("FLAGS_use_mkldnn", "0")
31
+
32
  # ──────────────────────────────────────────────────────────────────────────────
33
  # Lazy model state
34
  # ──────────────────────────────────────────────────────────────────────────────
 
42
  """Lazy-load layout detection engine.
43
 
44
  Tries the new PaddleOCR 2.8+ LayoutDetection API first;
45
+ falls back to the stable PPStructure API if not available or broken.
46
  """
47
  global _layout_engine
48
  if _layout_engine is None:
49
  with _lock:
50
  if _layout_engine is None:
51
+ _layout_engine = _load_layout_engine()
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return _layout_engine
53
 
54
 
55
+ def _load_layout_engine():
56
+ """Try new API first, fall back to stable PPStructure."""
57
+ try:
58
+ # PaddleOCR 2.8+ / PaddleX 3.x API
59
+ from paddleocr import LayoutDetection # type: ignore
60
+ engine = LayoutDetection()
61
+ # Quick smoke-test: if initialisation triggers a PIR error immediately,
62
+ # we catch it here and fall through to PPStructure.
63
+ return ("new", engine)
64
+ except Exception:
65
+ pass
66
+ # Fallback: stable PPStructure API (all PaddleOCR versions)
67
+ from paddleocr import PPStructure # type: ignore
68
+ engine = PPStructure(
69
+ table=False,
70
+ ocr=False,
71
+ show_log=False,
72
+ layout=True,
73
+ use_gpu=False,
74
+ enable_mkldnn=False,
75
+ )
76
+ return ("old", engine)
77
+
78
+
79
  def _get_ocr():
80
  """Lazy-load PaddleOCR text recognition engine."""
81
  global _ocr_engine
 
167
  - bbox: list [x1, y1, x2, y2] pixel coordinates
168
  - score: float confidence score
169
  """
170
+ global _layout_engine
171
  api_version, layout = _get_layout()
172
  try:
173
  if api_version == "new":
 
181
  raw = layout(img)
182
  return _parse_old_api(raw)
183
  except Exception as e:
184
+ err_str = str(e)
185
+ # If the new API fails at runtime (PIR / backend errors), force-reset to
186
+ # PPStructure and retry once.
187
+ if api_version == "new" and _layout_engine is not None:
188
+ with _lock:
189
+ _layout_engine = None # will re-init as old API next call
190
+ try:
191
+ _layout_engine = _load_layout_engine() # reload as PPStructure
192
+ api_version2, layout2 = _layout_engine
193
+ import cv2 # type: ignore
194
+ img = cv2.imread(image_path)
195
+ if img is None:
196
+ return {"regions": [], "error": f"Cannot read image: {image_path}"}
197
+ raw2 = layout2(img)
198
+ return _parse_old_api(raw2)
199
+ except Exception as e2:
200
+ return {"regions": [], "error": str(e2)}
201
+ return {"regions": [], "error": err_str}
202
 
203
 
204
  def extract_ordered_text(image_path: str) -> str: