Spaces:

fabioantonini
/

grapholab

Running

Fabio Antonini Claude Sonnet 4.6 commited on Apr 5

Commit

e479746

1 Parent(s): ad2e396

fix: isolate agent temp files per session + fix PaddlePaddle PIR on Windows

- Use a unique UUID subfolder (/tmp/gl/<session_id>/) per agent_stream call
so concurrent sessions never share/overwrite temp files
- Clean up the session temp dir in a finally block after streaming ends
- Disable PaddlePaddle PIR (FLAGS_enable_pir_api=0) and MKL-DNN at import
time in document_layout.py to prevent ConvertPirAttribute2RuntimeAttribute
errors on Windows with PaddlePaddle 3.x
- Auto-fallback from LayoutDetection to PPStructure if new API fails at
runtime (not just at import time)
- Improve analisi_layout error message to avoid LLM hallucinating wrong causes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (2) hide show

core/agent.py +16 -5
core/document_layout.py +52 -16

core/agent.py CHANGED Viewed

@@ -274,7 +274,12 @@ def analisi_layout(image_path: str) -> str:
         from core.document_layout import detect_layout
         result = detect_layout(image_path)
         if "error" in result:
-            return f"Errore layout detection: {result['error']}"
         regions = result.get("regions", [])
         if not regions:
             return "Nessuna regione strutturata rilevata nel documento."
@@ -494,10 +499,12 @@ def agent_stream(
         model = get_active_model()
     from langchain_core.messages import HumanMessage, AIMessage
-    # Copy uploaded files to a short, predictable temp path so the LLM
-    # does not mangle long Windows AppData paths when constructing tool args.
-    _gl_tmp = Path(tempfile.gettempdir()) / "gl"
-    _gl_tmp.mkdir(exist_ok=True)
     short_paths: list[str] = []
     for i, p in enumerate(file_paths):
         ext = Path(p).suffix or ".png"
@@ -584,3 +591,7 @@ def agent_stream(
     except Exception as e:
         yield f"{accumulated}\n\n❌ Errore dell'agente: {e}"

         from core.document_layout import detect_layout
         result = detect_layout(image_path)
         if "error" in result:
+            err = result['error']
+            # Return a plain technical error; do NOT speculate on the cause
+            return (
+                f"Errore tecnico nel modello di layout detection: {err}\n"
+                "Nota: questo è un errore del backend PaddlePaddle, non dipende dal tipo di file."
+            )
         regions = result.get("regions", [])
         if not regions:
             return "Nessuna regione strutturata rilevata nel documento."
         model = get_active_model()
     from langchain_core.messages import HumanMessage, AIMessage
+    # Copy uploaded files into a unique per-session subfolder so concurrent
+    # requests never overwrite each other's files (e.g. /tmp/gl/<uuid>/f0.jpg).
+    import uuid as _uuid
+    _session_id = _uuid.uuid4().hex[:12]
+    _gl_tmp = Path(tempfile.gettempdir()) / "gl" / _session_id
+    _gl_tmp.mkdir(parents=True, exist_ok=True)
     short_paths: list[str] = []
     for i, p in enumerate(file_paths):
         ext = Path(p).suffix or ".png"
     except Exception as e:
         yield f"{accumulated}\n\n❌ Errore dell'agente: {e}"
+    finally:
+        # Clean up the per-session temp directory after streaming completes
+        if _gl_tmp.exists():
+            shutil.rmtree(_gl_tmp, ignore_errors=True)

core/document_layout.py CHANGED Viewed

@@ -14,6 +14,7 @@ from __future__ import annotations
 import base64
 import io
 import json
 import threading
 from pathlib import Path
 from typing import Any
@@ -22,6 +23,12 @@ import numpy as np
 import requests
 from PIL import Image
 # ──────────────────────────────────────────────────────────────────────────────
 # Lazy model state
 # ──────────────────────────────────────────────────────────────────────────────
@@ -35,29 +42,40 @@ def _get_layout():
     """Lazy-load layout detection engine.
     Tries the new PaddleOCR 2.8+ LayoutDetection API first;
-    falls back to the stable PPStructure API if not available.
     """
     global _layout_engine
     if _layout_engine is None:
         with _lock:
             if _layout_engine is None:
-                try:
-                    # PaddleOCR 2.8+ / PaddleX 3.x API
-                    from paddleocr import LayoutDetection  # type: ignore
-                    _layout_engine = ("new", LayoutDetection())
-                except (ImportError, AttributeError):
-                    # Fallback: stable PPStructure API (all PaddleOCR versions)
-                    from paddleocr import PPStructure  # type: ignore
-                    engine = PPStructure(
-                        table=False,
-                        ocr=False,
-                        show_log=False,
-                        layout=True,
-                    )
-                    _layout_engine = ("old", engine)
     return _layout_engine
 def _get_ocr():
     """Lazy-load PaddleOCR text recognition engine."""
     global _ocr_engine
@@ -149,6 +167,7 @@ def detect_layout(image_path: str) -> dict:
           - bbox: list  [x1, y1, x2, y2] pixel coordinates
           - score: float confidence score
     """
     api_version, layout = _get_layout()
     try:
         if api_version == "new":
@@ -162,7 +181,24 @@ def detect_layout(image_path: str) -> dict:
             raw = layout(img)
             return _parse_old_api(raw)
     except Exception as e:
-        return {"regions": [], "error": str(e)}
 def extract_ordered_text(image_path: str) -> str:

 import base64
 import io
 import json
+import os
 import threading
 from pathlib import Path
 from typing import Any
 import requests
 from PIL import Image
+# Disable PaddlePaddle PIR (Program IR) introduced in Paddle 3.x — it triggers
+# "ConvertPirAttribute2RuntimeAttribute not supported" errors for layout models
+# on Windows. Setting this before any paddle import ensures CPU-only stable path.
+os.environ.setdefault("FLAGS_enable_pir_api", "0")
+os.environ.setdefault("FLAGS_use_mkldnn", "0")
 # ──────────────────────────────────────────────────────────────────────────────
 # Lazy model state
 # ──────────────────────────────────────────────────────────────────────────────
     """Lazy-load layout detection engine.
     Tries the new PaddleOCR 2.8+ LayoutDetection API first;
+    falls back to the stable PPStructure API if not available or broken.
     """
     global _layout_engine
     if _layout_engine is None:
         with _lock:
             if _layout_engine is None:
+                _layout_engine = _load_layout_engine()
     return _layout_engine
+def _load_layout_engine():
+    """Try new API first, fall back to stable PPStructure."""
+    try:
+        # PaddleOCR 2.8+ / PaddleX 3.x API
+        from paddleocr import LayoutDetection  # type: ignore
+        engine = LayoutDetection()
+        # Quick smoke-test: if initialisation triggers a PIR error immediately,
+        # we catch it here and fall through to PPStructure.
+        return ("new", engine)
+    except Exception:
+        pass
+    # Fallback: stable PPStructure API (all PaddleOCR versions)
+    from paddleocr import PPStructure  # type: ignore
+    engine = PPStructure(
+        table=False,
+        ocr=False,
+        show_log=False,
+        layout=True,
+        use_gpu=False,
+        enable_mkldnn=False,
+    )
+    return ("old", engine)
 def _get_ocr():
     """Lazy-load PaddleOCR text recognition engine."""
     global _ocr_engine
           - bbox: list  [x1, y1, x2, y2] pixel coordinates
           - score: float confidence score
     """
+    global _layout_engine
     api_version, layout = _get_layout()
     try:
         if api_version == "new":
             raw = layout(img)
             return _parse_old_api(raw)
     except Exception as e:
+        err_str = str(e)
+        # If the new API fails at runtime (PIR / backend errors), force-reset to
+        # PPStructure and retry once.
+        if api_version == "new" and _layout_engine is not None:
+            with _lock:
+                _layout_engine = None  # will re-init as old API next call
+            try:
+                _layout_engine = _load_layout_engine()  # reload as PPStructure
+                api_version2, layout2 = _layout_engine
+                import cv2  # type: ignore
+                img = cv2.imread(image_path)
+                if img is None:
+                    return {"regions": [], "error": f"Cannot read image: {image_path}"}
+                raw2 = layout2(img)
+                return _parse_old_api(raw2)
+            except Exception as e2:
+                return {"regions": [], "error": str(e2)}
+        return {"regions": [], "error": err_str}
 def extract_ordered_text(image_path: str) -> str: