Spaces:

Marylene
/

OpenFoodFactsAgent_POC

Sleeping

App Files Files Community

Marylene commited on Sep 22, 2025

Commit

d2e8069

verified ·

1 Parent(s): a0d1aaa

Update quick_deploy_agent.py

Browse files

Files changed (1) hide show

quick_deploy_agent.py +351 -425

quick_deploy_agent.py CHANGED Viewed

@@ -1,426 +1,352 @@
-# quickstart_agent.py
-from __future__ import annotations
-import json, re, unicodedata, ast
-from typing import List, Dict, Any, Optional
-import requests
-from smolagents import Tool, CodeAgent, InferenceClientModel
-from sentence_transformers import SentenceTransformer, util
-# ---- Mini référentiel COICOP (démo) ----
-COICOP_ITEMS = [
-    {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
-    {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
-    {"code": "01.1.4.5.3", "label": "Fromage à pâte pressée"},
-    {"code": "01.1.4.5.4", "label": "Fromage de chèvre"},
-    {"code": "01.1.4.5.5", "label": "Fromages fondus, râpés, portions"},
-    {"code": "01.1.1.4", "label": "Pain"},
-    {"code": "01.1.1.1", "label": "Riz"},
-    {"code": "01.1.1.3", "label": "Pâtes, couscous et produits similaires"},
-]
-def normalize_txt(s: str) -> str:
-    if not s: return ""
-    s = s.upper()
-    s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
-    s = re.sub(r"[^A-Z0-9% ]+", " ", s)
-    s = re.sub(r"\s+", " ", s).strip()
-    return s
-def ean_check_digit_ok(ean: str) -> bool:
-    digits = re.sub(r"\D", "", ean)
-    if len(digits) not in (8, 12, 13, 14): return False
-    total = 0
-    for i, ch in enumerate(reversed(digits[:-1]), start=1):
-        n = int(ch); total += n * (3 if i % 2 == 1 else 1)
-    check = (10 - (total % 10)) % 10
-    return check == int(digits[-1])
-# ---- ValidateEANTool : tout en local dans forward ----
-class ValidateEANTool(Tool):
-    name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
-    inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
-    output_type = "dict"  # <— (facultatif)
-    def forward(self, ean: str):
-        import re
-        digits = re.sub(r"\D", "", ean or "")
-        if len(digits) not in (8, 12, 13, 14):
-            return {"valid": False, "normalized": digits}
-        total = 0
-        for i, ch in enumerate(reversed(digits[:-1]), start=1):
-            n = int(ch); total += n * (3 if i % 2 == 1 else 1)
-        check = (10 - (total % 10)) % 10
-        return {"valid": check == int(digits[-1]), "normalized": digits}
-# ---- OFFtoCOICOP : accepte off_payload (JSON brut) OU champs séparés ----
-class OFFtoCOICOP(Tool):
-    name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP (off_payload ou champs séparés)."
-    inputs = {
-        "product_name":    {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
-        "categories_tags": {"type":"array",  "description":"Liste OFF categories_tags.", "nullable": True},
-        "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
-        # 👇 NOUVEAU : on peut passer directement la chaîne renvoyée par openfoodfacts_product_by_ean
-        "off_payload":     {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
-    }
-    output_type="string"
-    # --- utilitaires locaux (pas d'import global pour faciliter l'export Hub) ---
-    import re as _re, json as _json, ast as _ast
-    def _normalize_txt(self, s: str) -> str:
-        import unicodedata, re
-        if not s: return ""
-        s = s.upper()
-        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
-        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
-        return re.sub(r"\s+", " ", s).strip()
-    def _to_list(self, x):
-        import re
-        if x is None: return []
-        if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
-        if isinstance(x, str):
-            return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
-        return [str(x).strip()]
-    def _safe_parse(self, s):
-        try:
-            return self._json.loads(s)
-        except Exception:
-            try:
-                return self._ast.literal_eval(s)
-            except Exception:
-                return {}
-    # --- règles inchangées ---
-    RULES = [
-        (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"),
-         ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
-        (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"),
-         ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
-        (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"),
-         ("01.1.4.5.4",0.90,"OFF: chèvre")),
-        (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"),
-         ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
-    ]
-    def forward(self, product_name=None, categories_tags=None, ingredients_text=None, off_payload=None) -> str:
-        import json
-        # 1) Si off_payload est fourni, on parse et on remplit les champs
-        if off_payload and not (product_name or categories_tags or ingredients_text):
-            data = self._safe_parse(off_payload) or {}
-            # supporte aussi notre OFFByEAN normalisé (ok/product_name/…)
-            p_name = data.get("product_name")
-            c_tags = data.get("categories_tags")
-            ingr   = data.get("ingredients_text")
-            product_name    = p_name if isinstance(p_name, str) else ""
-            categories_tags = self._to_list(c_tags)
-            ingredients_text= ingr if isinstance(ingr, str) else ""
-        # 2) Normalisation texte
-        text = " ".join([t for t in [
-            self._normalize_txt(product_name or ""),
-            self._normalize_txt(" ".join(self._to_list(categories_tags))),
-            self._normalize_txt(ingredients_text or "")
-        ] if t])
-        # 3) Application des règles
-        c=[]
-        for rx,(code,score,why) in self.RULES:
-            if rx.search(text): c.append({"code":code,"why":why,"score":score})
-        return json.dumps({"candidates": c})
-# ---- OFFByEAN : robuste + sortie normalisée + step3_inputs ----
-class OFFByEAN(Tool):
-    name = "openfoodfacts_product_by_ean"
-    description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
-    inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
-    output_type = "dict"
-    requirements = ["requests"]
-    def forward(self, ean: str):
-        import re, requests
-        from requests.adapters import HTTPAdapter
-        try:
-            from urllib3.util.retry import Retry
-        except Exception:
-            Retry = None
-        def _to_list(x):
-            if x is None: return []
-            if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
-            if isinstance(x, str):
-                return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
-            return [str(x).strip()]
-        def _first(*vals):
-            for v in vals:
-                if isinstance(v, str) and v.strip(): return v.strip()
-            return ""
-        code = re.sub(r"\D", "", ean or "")
-        if not code:
-            return {"ok": False, "status": 0, "code": "", "error": "EAN vide"}
-        sess = requests.Session()
-        sess.headers.update({"User-Agent":"insee-coicop-agent/1.0","Accept":"application/json"})
-        if Retry:
-            retry = Retry(total=3, backoff_factor=0.5, status_forcelist=[429,500,502,503,504],
-                          allowed_methods=frozenset(["GET"]), raise_on_status=False)
-            sess.mount("https://", HTTPAdapter(max_retries=retry))
-        urls = [
-            f"https://world.openfoodfacts.org/api/v0/product/{code}.json",
-            "https://world.openfoodfacts.org/api/v2/product/"
-            f"{code}?lc=fr&fields=code,product_name,product_name_fr,brands,"
-            "categories_tags,categories_tags_fr,ingredients_text,ingredients_text_fr,"
-            "stores,status,status_verbose",
-            f"https://world.openfoodfacts.net/api/v0/product/{code}.json",
-        ]
-        last_err = None
-        for u in urls:
-            try:
-                r = sess.get(u, timeout=15)
-                if not r.ok:
-                    last_err = f"HTTP {r.status_code}"
-                    continue
-                data = r.json()
-                product = data.get("product")
-                status = data.get("status", 1 if product else 0)
-                if status == 1 or product:
-                    p = product or {}
-                    product_name = _first(p.get("product_name_fr"), p.get("product_name"))
-                    categories_tags = p.get("categories_tags_fr") or p.get("categories_tags") or p.get("categories")
-                    categories_tags = _to_list(categories_tags)
-                    ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
-                    brands = _first(p.get("brands"), None)
-                    stores = _first(p.get("stores"), None)
-                    return {
-                        "ok": True, "status": status, "status_verbose": data.get("status_verbose"),
-                        "code": code, "used_url": u,
-                        "product_name": product_name,
-                        "categories_tags": categories_tags,
-                        "ingredients_text": ingredients_text,
-                        "brands": brands, "brands_list": _to_list(brands),
-                        "stores": stores, "stores_list": _to_list(stores),
-                        "step3_inputs": {
-                            "product_name": product_name,
-                            "categories_tags": categories_tags,
-                            "ingredients_text": ingredients_text,
-                        },
-                    }
-            except Exception as e:
-                last_err = str(e)
-        return {"ok": False, "status": 0, "code": code, "error": last_err or "not found"}
-# ---- RegexCOICOP : normalisation locale + regex précompilées ----
-class RegexCOICOP(Tool):
-    name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
-    inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
-    output_type = "dict"
-    import re as _re
-    SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
-    PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
-    GOAT  = _re.compile(r"(?:\b|^)(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)(?:\b|$)")
-    PROC  = _re.compile(r"(?:\b|^)(FONDU(?:ES?)?|FROMAGE FONDU|TOASTINETTES?|VACHE QUI RIT|KIRI|CARRE FRAIS|CARR[ÉE] FRAIS|PORTIONS?)(?:\b|$)|\bRAP[ÉE]?\b")
-    @staticmethod
-    def _normalize_txt(s: str) -> str:
-        import unicodedata, re
-        if not s: return ""
-        s = s.upper()
-        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
-        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
-        return re.sub(r"\s+", " ", s).strip()
-    def forward(self, text: str):
-        import re
-        s = self._normalize_txt(text); c=[]
-        if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
-        if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
-        if self.GOAT.search(s):  c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
-        if self.PROC.search(s):  c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
-        if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
-        return {"candidates": c}
-# ---- OFFtoCOICOP : normalisation locale + regex règles ----
-class OFFtoCOICOP(Tool):
-    name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP (off_payload ou champs séparés)."
-    inputs = {
-        "product_name":    {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
-        "categories_tags": {"type":"array",  "description":"Liste OFF categories_tags.", "nullable": True},
-        "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
-        "off_payload":     {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
-    }
-    output_type="dict"
-    import re as _re, json as _json, ast as _ast
-    def _normalize_txt(self, s: str) -> str:
-        import unicodedata, re
-        if not s: return ""
-        s = s.upper()
-        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
-        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
-        return re.sub(r"\s+", " ", s).strip()
-    def _to_list(self, x):
-        import re
-        if x is None: return []
-        if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
-        if isinstance(x, str): return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
-        return [str(x).strip()]
-    def _safe_parse(self, s):
-        try: return self._json.loads(s)
-        except Exception:
-            try: return self._ast.literal_eval(s)
-            except Exception: return {}
-    RULES = [
-        (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
-        (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
-        (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
-        (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
-    ]
-    def forward(self, product_name=None, categories_tags=None, ingredients_text=None, off_payload=None):
-        if off_payload and not (product_name or categories_tags or ingredients_text):
-            data = self._safe_parse(off_payload) or {}
-            product_name    = data.get("product_name") or ""
-            categories_tags = self._to_list(data.get("categories_tags"))
-            ingredients_text= data.get("ingredients_text") or ""
-        text = " ".join([t for t in [
-            self._normalize_txt(product_name or ""),
-            self._normalize_txt(" ".join(self._to_list(categories_tags))),
-            self._normalize_txt(ingredients_text or "")
-        ] if t])
-        c=[]
-        for rx,(code,score,why) in self.RULES:
-            if rx.search(text): c.append({"code":code,"why":why,"score":score})
-        return {"candidates": c}
-# ---- SemSim : COICOP embarqué + import lazy du modèle ----
-class SemSim(Tool):
-    name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
-    inputs = {"text":{"type":"string","description":"Texte libellé"},
-              "topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
-    output_type = "dict"
-    requirements = ["sentence_transformers", "torch"]
-    COICOP_ITEMS = [
-        {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
-        {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
-        {"code": "01.1.4.5.3", "label": "Fromage à pâte pressée"},
-        {"code": "01.1.4.5.4", "label": "Fromage de chèvre"},
-        {"code": "01.1.4.5.5", "label": "Fromages fondus, râpés, portions"},
-        {"code": "01.1.1.4", "label": "Pain"},
-        {"code": "01.1.1.1", "label": "Riz"},
-        {"code": "01.1.1.3", "label": "Pâtes, couscous et produits similaires"},
-    ]
-    @staticmethod
-    def _normalize_txt(s: str) -> str:
-        import unicodedata, re
-        if not s: return ""
-        s = s.upper()
-        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
-        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
-        return re.sub(r"\s+", " ", s).strip()
-    def forward(self, text: str, topk: int = 5):
-        from sentence_transformers import SentenceTransformer, util
-        if not hasattr(self, "_model"):
-            self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-        q = self._normalize_txt(text)
-        q_emb = self._model.encode([q], normalize_embeddings=True)
-        labels = [f"{it['code']} {it['label']}" for it in self.COICOP_ITEMS]
-        L = self._model.encode(labels, normalize_embeddings=True)
-        sims = util.cos_sim(q_emb, L).tolist()[0]
-        ranked = sorted(
-            [{"code": self.COICOP_ITEMS[i]["code"], "label": self.COICOP_ITEMS[i]["label"], "score": float(sims[i])}
-             for i in range(len(self.COICOP_ITEMS))],
-            key=lambda x: x["score"], reverse=True
-        )
-        return {"candidates": ranked[:max(1,int(topk))]}
-# ---- Resolve : import local json ----
-class Resolve(Tool):
-    name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
-    inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str/dict) d'autres tools."},
-              "topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
-    output_type = "dict"
-    def forward(self, json_lists, topn: int = 3):
-        # accepter listes de strings JSON OU de dicts
-        import json
-        from typing import Dict, Any
-        bucket: Dict[str, Dict[str, Any]] = {}
-        for s in json_lists:
-            data = s
-            if isinstance(s, str):
-                try: data = json.loads(s)
-                except Exception: data = {}
-            if not isinstance(data, dict): continue
-            for c in data.get("candidates", []):
-                code = c["code"]; score = float(c.get("score", 0.0))
-                why = c.get("why", "") or c.get("label", "")
-                if code not in bucket:
-                    bucket[code] = {"code":code,"score":score,"votes":1,"evidences":[why] if why else []}
-                else:
-                    bucket[code]["score"] = max(bucket[code]["score"], score)
-                    bucket[code]["votes"] += 1
-                    if why: bucket[code]["evidences"].append(why)
-        for v in bucket.values():
-            v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
-        ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
-        if not ranked: return {"final": None, "alternatives": [], "explanation":"Aucun candidat"}
-        final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
-        exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
-        return {"final": final, "alternatives": alts, "explanation": exp}
-def build_agent(model_id: str | None = None) -> CodeAgent:
-    model_id = model_id or "Qwen/Qwen2.5-Coder-7B-Instruct"  # léger pour tester
-    agent = CodeAgent(
-        tools=[ValidateEANTool(), OFFByEAN(), RegexCOICOP(), OFFtoCOICOP(), SemSim(), Resolve()],
-        model=InferenceClientModel(model_id=model_id),
-        add_base_tools=False,
-        max_steps=6,
-        verbosity_level=2,
-    )
-    return agent
-def parse_result(res):
-    if isinstance(res, dict): return res
-    try: return ast.literal_eval(res)
-    except Exception: return {"raw": res}
-if __name__ == "__main__":
-    # Remplace par les vraies données si possible - uniquement du test
-    ean = "3256221112345"  # EAN fictif (peut ne pas exister sur OFF)
-    label = "Camembert au lait cru AOP 250g - ALDI"
-    agent = build_agent()
-    task = f"""
-    Classe ce produit en COICOP:
-    EAN: {ean}
-    Libellé: {label}
-    Pipeline:
-    1) validate_ean(ean)
-    2) openfoodfacts_product_by_ean(ean)  # si OFF ne trouve pas, on s'appuie sur regex + embeddings
-    3) map_off_to_coicop(product_name, categories_tags, ingredients_text)
-    4) coicop_regex_rules(text=libellé)
-    5) coicop_semantic_similarity(text=libellé, topk=5)
-    6) resolve_coicop_candidates([...], topn=3)
-    Attend un JSON final.
-    """
-    out = agent.run(task)
     print(parse_result(out))

+# quickstart_agent.py
+from __future__ import annotations
+import json, re, unicodedata, ast
+from typing import List, Dict, Any, Optional
+import requests
+from smolagents import Tool, CodeAgent, InferenceClientModel
+from sentence_transformers import SentenceTransformer, util
+# ---- Mini référentiel COICOP (démo) ----
+COICOP_ITEMS = [
+    {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
+    {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
+    {"code": "01.1.4.5.3", "label": "Fromage à pâte pressée"},
+    {"code": "01.1.4.5.4", "label": "Fromage de chèvre"},
+    {"code": "01.1.4.5.5", "label": "Fromages fondus, râpés, portions"},
+    {"code": "01.1.1.4", "label": "Pain"},
+    {"code": "01.1.1.1", "label": "Riz"},
+    {"code": "01.1.1.3", "label": "Pâtes, couscous et produits similaires"},
+]
+def normalize_txt(s: str) -> str:
+    if not s: return ""
+    s = s.upper()
+    s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
+    s = re.sub(r"[^A-Z0-9% ]+", " ", s)
+    s = re.sub(r"\s+", " ", s).strip()
+    return s
+def ean_check_digit_ok(ean: str) -> bool:
+    digits = re.sub(r"\D", "", ean)
+    if len(digits) not in (8, 12, 13, 14): return False
+    total = 0
+    for i, ch in enumerate(reversed(digits[:-1]), start=1):
+        n = int(ch); total += n * (3 if i % 2 == 1 else 1)
+    check = (10 - (total % 10)) % 10
+    return check == int(digits[-1])
+# ---- ValidateEANTool : tout en local dans forward ----
+class ValidateEANTool(Tool):
+    name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
+    inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
+    output_type = "dict"  # <— (facultatif)
+    def forward(self, ean: str):
+        import re
+        digits = re.sub(r"\D", "", ean or "")
+        if len(digits) not in (8, 12, 13, 14):
+            return {"valid": False, "normalized": digits}
+        total = 0
+        for i, ch in enumerate(reversed(digits[:-1]), start=1):
+            n = int(ch); total += n * (3 if i % 2 == 1 else 1)
+        check = (10 - (total % 10)) % 10
+        return {"valid": check == int(digits[-1]), "normalized": digits}
+# ---- OFFtoCOICOP : accepte off_payload (JSON brut) OU champs séparés ----
+# ---- OFFByEAN : robuste + sortie normalisée + step3_inputs ----
+class OFFByEAN(Tool):
+    name = "openfoodfacts_product_by_ean"
+    description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
+    inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
+    output_type = "dict"
+    requirements = ["requests"]
+    def forward(self, ean: str):
+        import re, requests
+        from requests.adapters import HTTPAdapter
+        try:
+            from urllib3.util.retry import Retry
+        except Exception:
+            Retry = None
+        def _to_list(x):
+            if x is None: return []
+            if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
+            if isinstance(x, str):
+                return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
+            return [str(x).strip()]
+        def _first(*vals):
+            for v in vals:
+                if isinstance(v, str) and v.strip(): return v.strip()
+            return ""
+        code = re.sub(r"\D", "", ean or "")
+        if not code:
+            return {"ok": False, "status": 0, "code": "", "error": "EAN vide"}
+        sess = requests.Session()
+        sess.headers.update({"User-Agent":"insee-coicop-agent/1.0","Accept":"application/json"})
+        if Retry:
+            retry = Retry(total=3, backoff_factor=0.5, status_forcelist=[429,500,502,503,504],
+                          allowed_methods=frozenset(["GET"]), raise_on_status=False)
+            sess.mount("https://", HTTPAdapter(max_retries=retry))
+        urls = [
+            f"https://world.openfoodfacts.org/api/v0/product/{code}.json",
+            "https://world.openfoodfacts.org/api/v2/product/"
+            f"{code}?lc=fr&fields=code,product_name,product_name_fr,brands,"
+            "categories_tags,categories_tags_fr,ingredients_text,ingredients_text_fr,"
+            "stores,status,status_verbose",
+            f"https://world.openfoodfacts.net/api/v0/product/{code}.json",
+        ]
+        last_err = None
+        for u in urls:
+            try:
+                r = sess.get(u, timeout=15)
+                if not r.ok:
+                    last_err = f"HTTP {r.status_code}"
+                    continue
+                data = r.json()
+                product = data.get("product")
+                status = data.get("status", 1 if product else 0)
+                if status == 1 or product:
+                    p = product or {}
+                    product_name = _first(p.get("product_name_fr"), p.get("product_name"))
+                    categories_tags = p.get("categories_tags_fr") or p.get("categories_tags") or p.get("categories")
+                    categories_tags = _to_list(categories_tags)
+                    ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
+                    brands = _first(p.get("brands"), None)
+                    stores = _first(p.get("stores"), None)
+                    return {
+                        "ok": True, "status": status, "status_verbose": data.get("status_verbose"),
+                        "code": code, "used_url": u,
+                        "product_name": product_name,
+                        "categories_tags": categories_tags,
+                        "ingredients_text": ingredients_text,
+                        "brands": brands, "brands_list": _to_list(brands),
+                        "stores": stores, "stores_list": _to_list(stores),
+                        "step3_inputs": {
+                            "product_name": product_name,
+                            "categories_tags": categories_tags,
+                            "ingredients_text": ingredients_text,
+                        },
+                    }
+            except Exception as e:
+                last_err = str(e)
+        return {"ok": False, "status": 0, "code": code, "error": last_err or "not found"}
+# ---- RegexCOICOP : normalisation locale + regex précompilées ----
+class RegexCOICOP(Tool):
+    name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
+    inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
+    output_type = "dict"
+    import re as _re
+    SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
+    PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
+    GOAT  = _re.compile(r"(?:\b|^)(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)(?:\b|$)")
+    PROC  = _re.compile(r"(?:\b|^)(FONDU(?:ES?)?|FROMAGE FONDU|TOASTINETTES?|VACHE QUI RIT|KIRI|CARRE FRAIS|CARR[ÉE] FRAIS|PORTIONS?)(?:\b|$)|\bRAP[ÉE]?\b")
+    @staticmethod
+    def _normalize_txt(s: str) -> str:
+        import unicodedata, re
+        if not s: return ""
+        s = s.upper()
+        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
+        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
+        return re.sub(r"\s+", " ", s).strip()
+    def forward(self, text: str):
+        import re
+        s = self._normalize_txt(text); c=[]
+        if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
+        if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
+        if self.GOAT.search(s):  c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
+        if self.PROC.search(s):  c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
+        if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
+        return {"candidates": c}
+# ---- OFFtoCOICOP : normalisation locale + regex règles ----
+class OFFtoCOICOP(Tool):
+    name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP (off_payload ou champs séparés)."
+    inputs = {
+        "product_name":    {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
+        "categories_tags": {"type":"array",  "description":"Liste OFF categories_tags.", "nullable": True},
+        "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
+        "off_payload":     {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
+    }
+    output_type="dict"
+    import re as _re, json as _json, ast as _ast
+    def _normalize_txt(self, s: str) -> str:
+        import unicodedata, re
+        if not s: return ""
+        s = s.upper()
+        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
+        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
+        return re.sub(r"\s+", " ", s).strip()
+    def _to_list(self, x):
+        import re
+        if x is None: return []
+        if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
+        if isinstance(x, str): return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
+        return [str(x).strip()]
+    def _safe_parse(self, s):
+        try: return self._json.loads(s)
+        except Exception:
+            try: return self._ast.literal_eval(s)
+            except Exception: return {}
+    RULES = [
+        (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
+        (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
+        (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
+        (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
+    ]
+    def forward(self, product_name=None, categories_tags=None, ingredients_text=None, off_payload=None):
+        if off_payload and not (product_name or categories_tags or ingredients_text):
+            data = self._safe_parse(off_payload) or {}
+            product_name    = data.get("product_name") or ""
+            categories_tags = self._to_list(data.get("categories_tags"))
+            ingredients_text= data.get("ingredients_text") or ""
+        text = " ".join([t for t in [
+            self._normalize_txt(product_name or ""),
+            self._normalize_txt(" ".join(self._to_list(categories_tags))),
+            self._normalize_txt(ingredients_text or "")
+        ] if t])
+        c=[]
+        for rx,(code,score,why) in self.RULES:
+            if rx.search(text): c.append({"code":code,"why":why,"score":score})
+        return {"candidates": c}
+# ---- SemSim : COICOP embarqué + import lazy du modèle ----
+class SemSim(Tool):
+    name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
+    inputs = {"text":{"type":"string","description":"Texte libellé"},
+              "topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
+    output_type = "dict"
+    requirements = ["sentence_transformers", "torch"]
+    COICOP_ITEMS = [
+        {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
+        {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
+        {"code": "01.1.4.5.3", "label": "Fromage à pâte pressée"},
+        {"code": "01.1.4.5.4", "label": "Fromage de chèvre"},
+        {"code": "01.1.4.5.5", "label": "Fromages fondus, râpés, portions"},
+        {"code": "01.1.1.4", "label": "Pain"},
+        {"code": "01.1.1.1", "label": "Riz"},
+        {"code": "01.1.1.3", "label": "Pâtes, couscous et produits similaires"},
+    ]
+    @staticmethod
+    def _normalize_txt(s: str) -> str:
+        import unicodedata, re
+        if not s: return ""
+        s = s.upper()
+        s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
+        s = re.sub(r"[^A-Z0-9% ]+", " ", s)
+        return re.sub(r"\s+", " ", s).strip()
+    def forward(self, text: str, topk: int = 5):
+        from sentence_transformers import SentenceTransformer, util
+        if not hasattr(self, "_model"):
+            self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+        q = self._normalize_txt(text)
+        q_emb = self._model.encode([q], normalize_embeddings=True)
+        labels = [f"{it['code']} {it['label']}" for it in self.COICOP_ITEMS]
+        L = self._model.encode(labels, normalize_embeddings=True)
+        sims = util.cos_sim(q_emb, L).tolist()[0]
+        ranked = sorted(
+            [{"code": self.COICOP_ITEMS[i]["code"], "label": self.COICOP_ITEMS[i]["label"], "score": float(sims[i])}
+             for i in range(len(self.COICOP_ITEMS))],
+            key=lambda x: x["score"], reverse=True
+        )
+        return {"candidates": ranked[:max(1,int(topk))]}
+# ---- Resolve : import local json ----
+class Resolve(Tool):
+    name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
+    inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str/dict) d'autres tools."},
+              "topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
+    output_type = "dict"
+    def forward(self, json_lists, topn: int = 3):
+        # accepter listes de strings JSON OU de dicts
+        import json
+        from typing import Dict, Any
+        bucket: Dict[str, Dict[str, Any]] = {}
+        for s in json_lists:
+            data = s
+            if isinstance(s, str):
+                try: data = json.loads(s)
+                except Exception: data = {}
+            if not isinstance(data, dict): continue
+            for c in data.get("candidates", []):
+                code = c["code"]; score = float(c.get("score", 0.0))
+                why = c.get("why", "") or c.get("label", "")
+                if code not in bucket:
+                    bucket[code] = {"code":code,"score":score,"votes":1,"evidences":[why] if why else []}
+                else:
+                    bucket[code]["score"] = max(bucket[code]["score"], score)
+                    bucket[code]["votes"] += 1
+                    if why: bucket[code]["evidences"].append(why)
+        for v in bucket.values():
+            v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
+        ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
+        if not ranked: return {"final": None, "alternatives": [], "explanation":"Aucun candidat"}
+        final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
+        exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
+        return {"final": final, "alternatives": alts, "explanation": exp}
+def build_agent(model_id: str | None = None) -> CodeAgent:
+    model_id = model_id or "Qwen/Qwen2.5-Coder-7B-Instruct"  # léger pour tester
+    agent = CodeAgent(
+        tools=[ValidateEANTool(), OFFByEAN(), RegexCOICOP(), OFFtoCOICOP(), SemSim(), Resolve()],
+        model=InferenceClientModel(model_id=model_id),
+        add_base_tools=False,
+        max_steps=6,
+        verbosity_level=2,
+    )
+    return agent
+def parse_result(res):
+    if isinstance(res, dict): return res
+    try: return ast.literal_eval(res)
+    except Exception: return {"raw": res}
+if __name__ == "__main__":
+    # Remplace par les vraies données si possible - uniquement du test
+    ean = "3256221112345"  # EAN fictif (peut ne pas exister sur OFF)
+    label = "Camembert au lait cru AOP 250g - ALDI"
+    agent = build_agent()
+    task = f"""
+    Classe ce produit en COICOP:
+    EAN: {ean}
+    Libellé: {label}
+    Pipeline:
+    1) validate_ean(ean)
+    2) openfoodfacts_product_by_ean(ean)  # si OFF ne trouve pas, on s'appuie sur regex + embeddings
+    3) map_off_to_coicop(product_name, categories_tags, ingredients_text)
+    4) coicop_regex_rules(text=libellé)
+    5) coicop_semantic_similarity(text=libellé, topk=5)
+    6) resolve_coicop_candidates([...], topn=3)
+    Attend un JSON final.
+    """
+    out = agent.run(task)
     print(parse_result(out))