Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Running

Claude commited on Mar 11

Commit

a30c589

unverified ·

1 Parent(s): 001e605

fix(llm): nettoyage debug + robustesse réponse Mistral SDK

Supprime tous les logs temporaires de diagnostic (ENTRY TRACE, DEBUG
verbose) ajoutés lors des sprints de debug précédents.

mistral_adapter.py :
- Suppression de 60 lignes de logs DEBUG/INFO temporaires
- Ajout d'un guard pour content de type list (certaines versions du SDK
mistralai retournent une liste de ContentChunk au lieu d'un str)
- Conservation des logs utiles en production : appel, réponse, erreurs

pipelines/base.py :
- Suppression des ENTRY TRACE
- L'exception dans run() est maintenant loguée en WARNING (pas INFO)

engines/base.py :
- Suppression du ENTRY TRACE et de l'import logging inutile

runner.py :
- Suppression du ENTRY TRACE dans _io_doc_worker

36 tests OK (test_sprint15 + test_engines).

https://claude.ai/code/session_017gXea9mxBQqDTAsSQd7aAq

Files changed (4) hide show

picarones/core/runner.py +0 -5
picarones/engines/base.py +0 -8
picarones/llm/mistral_adapter.py +24 -62
picarones/pipelines/base.py +3 -14

picarones/core/runner.py CHANGED Viewed

@@ -71,11 +71,6 @@ def _io_doc_worker(
     moteur est partagée entre les threads — les adaptateurs HTTP sont
     généralement sans état mutable entre les appels.
     """
-    # ENTRY TRACE — confirme que _io_doc_worker est bien exécuté et quelle classe est appelée
-    logger.info(
-        "[runner-ENTRY] _io_doc_worker — classe=%s, doc=%s",
-        engine.__class__.__name__, getattr(doc, "doc_id", "?"),
-    )
     ocr_result = engine.run(doc.image_path)  # type: ignore[attr-defined]
     return _compute_document_result(
         doc_id=doc.doc_id,  # type: ignore[attr-defined]

     moteur est partagée entre les threads — les adaptateurs HTTP sont
     généralement sans état mutable entre les appels.
     """
     ocr_result = engine.run(doc.image_path)  # type: ignore[attr-defined]
     return _compute_document_result(
         doc_id=doc.doc_id,  # type: ignore[attr-defined]

picarones/engines/base.py CHANGED Viewed

@@ -3,15 +3,12 @@
 from __future__ import annotations
 import hashlib
-import logging
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional
-_base_logger = logging.getLogger(__name__)
 @dataclass
 class EngineResult:
@@ -71,11 +68,6 @@ class BaseOCREngine(ABC):
     def run(self, image_path: str | Path) -> EngineResult:
         """Point d'entrée public : exécute l'OCR et mesure le temps d'exécution."""
         image_path = Path(image_path)
-        # ENTRY TRACE — confirme quel moteur/classe est réellement exécuté
-        _base_logger.info(
-            "[BaseOCREngine-ENTRY] run() — classe=%s, doc=%s",
-            self.__class__.__name__, image_path.name,
-        )
         start = time.perf_counter()
         try:
             text = self._run_ocr(image_path)

 from __future__ import annotations
 import hashlib
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional
 @dataclass
 class EngineResult:
     def run(self, image_path: str | Path) -> EngineResult:
         """Point d'entrée public : exécute l'OCR et mesure le temps d'exécution."""
         image_path = Path(image_path)
         start = time.perf_counter()
         try:
             text = self._run_ocr(image_path)

picarones/llm/mistral_adapter.py CHANGED Viewed

@@ -93,15 +93,9 @@ class MistralAdapter(BaseLLMAdapter):
         else:
             content = prompt
-        # INFO — longueur du texte OCR reçu (visible niveau INFO)
         logger.info(
-            "[MistralAdapter] texte OCR reçu : %d chars (modèle=%s, image=%s)",
-            len(prompt), self.model, "oui" if image_b64 else "non",
-        )
-        # DEBUG — prompt complet tronqué à 200 chars
-        logger.debug(
-            "[MistralAdapter] DEBUG prompt (200 premiers chars) : %r",
-            prompt[:200],
         )
         try:
@@ -113,11 +107,6 @@ class MistralAdapter(BaseLLMAdapter):
             )
         except Exception as exc:
             status_code = getattr(exc, "status_code", None) or getattr(exc, "http_status", None)
-            # DEBUG — statut HTTP en cas d'erreur
-            logger.debug(
-                "[MistralAdapter] DEBUG exception type=%s status_code=%s message=%s",
-                type(exc).__name__, status_code, exc,
-            )
             if status_code == 401:
                 logger.warning(
                     "[MistralAdapter] erreur HTTP 401 — clé API invalide ou expirée "
@@ -143,24 +132,9 @@ class MistralAdapter(BaseLLMAdapter):
                 )
             raise
-        # DEBUG — choices complètes (visible niveau DEBUG uniquement)
-        try:
-            choices_debug = [
-                {
-                    "index": c.index,
-                    "finish_reason": c.finish_reason,
-                    "content_type": type(c.message.content).__name__ if c.message else None,
-                    "content_len": len(c.message.content) if c.message and c.message.content else 0,
-                }
-                for c in (response.choices or [])
-            ]
-        except Exception as _exc:  # noqa: BLE001
-            choices_debug = f"<erreur sérialisation choices : {_exc}>"
-        logger.debug("[MistralAdapter] DEBUG response.choices : %s", choices_debug)
         if not response.choices:
             logger.warning(
-                "[MistralAdapter] DEBUG response.choices est vide — modèle=%s.",
                 self.model,
             )
             return ""
@@ -168,44 +142,32 @@ class MistralAdapter(BaseLLMAdapter):
         _choice = response.choices[0]
         raw = _choice.message.content
         _finish_reason = _choice.finish_reason
-        _content_len = len(raw) if raw else 0
-        # INFO — statut réponse API : finish_reason + content_len (visible niveau INFO)
-        logger.info(
-            "[MistralAdapter] réponse : finish_reason=%s, content_len=%d",
-            _finish_reason, _content_len,
-        )
-        # DEBUG — valeur brute avant retour
-        logger.debug(
-            "[MistralAdapter] DEBUG choices[0].message.content type=%s valeur=%r",
-            type(raw).__name__,
-            raw[:200] if isinstance(raw, str) else raw,
-        )
         text = raw or ""
-        if not text or not text.strip():
-            _completion_tokens = "?"
-            if hasattr(response, "usage") and response.usage:
-                _completion_tokens = getattr(response.usage, "completion_tokens", "?")
-            # INFO — contenu vide avec completion_tokens pour diagnostic (visible niveau INFO)
-            logger.info(
-                "[MistralAdapter] WARNING contenu vide — completion_tokens=%s "
-                "(modèle=%s, finish_reason=%s)",
-                _completion_tokens, self.model, _finish_reason,
-            )
             logger.warning(
-                "[MistralAdapter] réponse vide reçue du modèle '%s' "
-                "(longueur brute : %s). "
-                "Vérifier que le modèle supporte l'API chat/completions et "
-                "que le prompt contient bien {ocr_output}.",
-                self.model, len(raw) if raw is not None else "None",
-            )
-        else:
-            logger.debug(
-                "[MistralAdapter] réponse reçue — %d caractères, extrait : %r",
-                len(text), text[:120],
             )
         return text

         else:
             content = prompt
         logger.info(
+            "[MistralAdapter] appel %s — prompt=%d chars, image=%s",
+            self.model, len(prompt), "oui" if image_b64 else "non",
         )
         try:
             )
         except Exception as exc:
             status_code = getattr(exc, "status_code", None) or getattr(exc, "http_status", None)
             if status_code == 401:
                 logger.warning(
                     "[MistralAdapter] erreur HTTP 401 — clé API invalide ou expirée "
                 )
             raise
         if not response.choices:
             logger.warning(
+                "[MistralAdapter] response.choices vide (modèle=%s).",
                 self.model,
             )
             return ""
         _choice = response.choices[0]
         raw = _choice.message.content
         _finish_reason = _choice.finish_reason
+        # Le SDK mistralai peut retourner une liste de ContentChunk au lieu
+        # d'une chaîne pour certains modèles/versions.  Normaliser en str.
+        if isinstance(raw, list):
+            raw = "".join(
+                chunk.text if hasattr(chunk, "text") else str(chunk)
+                for chunk in raw
+            )
         text = raw or ""
+        _completion_tokens = None
+        if hasattr(response, "usage") and response.usage:
+            _completion_tokens = getattr(response.usage, "completion_tokens", None)
+        logger.info(
+            "[MistralAdapter] réponse %s — finish_reason=%s, len=%d, tokens=%s",
+            self.model, _finish_reason, len(text), _completion_tokens,
+        )
+        if not text.strip():
             logger.warning(
+                "[MistralAdapter] réponse vide du modèle '%s' "
+                "(finish_reason=%s, completion_tokens=%s). "
+                "Vérifier le prompt et la compatibilité du modèle.",
+                self.model, _finish_reason, _completion_tokens,
             )
         return text

picarones/pipelines/base.py CHANGED Viewed

@@ -143,11 +143,6 @@ class OCRLLMPipeline(BaseOCREngine):
     def _run_ocr(self, image_path: Path) -> str:
         """Logique interne du pipeline — appelée par ``run()``."""
-        # ENTRY TRACE — confirme que _run_ocr() est atteint après run()
-        logger.info(
-            "[Pipeline-ENTRY] _run_ocr() appelé — doc=%s, mode=%s",
-            image_path.name, self.mode.value,
-        )
         self._last_ocr_text = None
         ocr_text = ""
@@ -237,11 +232,6 @@ class OCRLLMPipeline(BaseOCREngine):
     def run(self, image_path: str | Path) -> EngineResult:
         """Exécute le pipeline et retourne un EngineResult enrichi de métadonnées."""
         image_path = Path(image_path)
-        # ENTRY TRACE — confirme que OCRLLMPipeline.run() est bien la méthode exécutée
-        logger.info(
-            "[Pipeline-ENTRY] OCRLLMPipeline.run() appelé — doc=%s, mode=%s, llm=%s",
-            image_path.name, self.mode.value, self.llm_adapter.model,
-        )
         self._last_ocr_text = None
         start = time.perf_counter()
@@ -251,10 +241,9 @@ class OCRLLMPipeline(BaseOCREngine):
         except Exception as exc:  # noqa: BLE001
             text = ""
             error = str(exc)
-            # INFO — exception capturée avant ou pendant l'appel LLM (visible niveau INFO)
-            logger.info(
-                "[Pipeline] EXCEPTION capturée pour doc %s : %s: %s",
-                image_path.name, type(exc).__name__, exc,
             )
         duration = time.perf_counter() - start

     def _run_ocr(self, image_path: Path) -> str:
         """Logique interne du pipeline — appelée par ``run()``."""
         self._last_ocr_text = None
         ocr_text = ""
     def run(self, image_path: str | Path) -> EngineResult:
         """Exécute le pipeline et retourne un EngineResult enrichi de métadonnées."""
         image_path = Path(image_path)
         self._last_ocr_text = None
         start = time.perf_counter()
         except Exception as exc:  # noqa: BLE001
             text = ""
             error = str(exc)
+            logger.warning(
+                "[%s] erreur pipeline pour '%s' : %s",
+                self._name, image_path.name, exc,
             )
         duration = time.perf_counter() - start