Spaces:
Sleeping
Sleeping
Mise en forme
Browse files- app.py +4 -5
- quick_deploy_agent.py +78 -126
app.py
CHANGED
|
@@ -74,10 +74,9 @@ Pipeline :
|
|
| 74 |
6) resolve_coicop_candidates([...], topn=3)
|
| 75 |
|
| 76 |
Règles strictes :
|
| 77 |
-
-
|
| 78 |
-
-
|
| 79 |
-
|
| 80 |
-
- Retourne UNIQUEMENT un objet JSON valide, sans backticks, sans prose.
|
| 81 |
"""
|
| 82 |
|
| 83 |
|
|
@@ -166,7 +165,7 @@ theme = gr.themes.Soft(
|
|
| 166 |
|
| 167 |
custom_css = """
|
| 168 |
:root{
|
| 169 |
-
--insee-primary: #
|
| 170 |
--insee-primary-700:#0a357f;
|
| 171 |
--insee-accent: #ff5c35;
|
| 172 |
--insee-neutral-50:#f8fafc;
|
|
|
|
| 74 |
6) resolve_coicop_candidates([...], topn=3)
|
| 75 |
|
| 76 |
Règles strictes :
|
| 77 |
+
- Les outils renvoient des objets Python (dict/list). Tu peux indexer directement.
|
| 78 |
+
- Pour (4) et (5), utilise le libellé utilisateur (pas besoin de lire la réponse d’OFF).
|
| 79 |
+
- Retourne uniquement un JSON valide (objet), sans backticks.
|
|
|
|
| 80 |
"""
|
| 81 |
|
| 82 |
|
|
|
|
| 165 |
|
| 166 |
custom_css = """
|
| 167 |
:root{
|
| 168 |
+
--insee-primary: #89c2d9;
|
| 169 |
--insee-primary-700:#0a357f;
|
| 170 |
--insee-accent: #ff5c35;
|
| 171 |
--insee-neutral-50:#f8fafc;
|
quick_deploy_agent.py
CHANGED
|
@@ -39,19 +39,18 @@ def ean_check_digit_ok(ean: str) -> bool:
|
|
| 39 |
class ValidateEANTool(Tool):
|
| 40 |
name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
|
| 41 |
inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
|
| 42 |
-
output_type = "
|
| 43 |
|
| 44 |
-
def forward(self, ean: str)
|
| 45 |
-
import
|
| 46 |
digits = re.sub(r"\D", "", ean or "")
|
| 47 |
if len(digits) not in (8, 12, 13, 14):
|
| 48 |
-
return
|
| 49 |
total = 0
|
| 50 |
for i, ch in enumerate(reversed(digits[:-1]), start=1):
|
| 51 |
n = int(ch); total += n * (3 if i % 2 == 1 else 1)
|
| 52 |
check = (10 - (total % 10)) % 10
|
| 53 |
-
return
|
| 54 |
-
|
| 55 |
|
| 56 |
# ---- OFFtoCOICOP : accepte off_payload (JSON brut) OU champs séparés ----
|
| 57 |
class OFFtoCOICOP(Tool):
|
|
@@ -134,50 +133,38 @@ class OFFByEAN(Tool):
|
|
| 134 |
name = "openfoodfacts_product_by_ean"
|
| 135 |
description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
|
| 136 |
inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
|
| 137 |
-
output_type = "
|
| 138 |
-
requirements = ["requests"]
|
| 139 |
|
| 140 |
-
def forward(self, ean: str)
|
| 141 |
-
import re,
|
| 142 |
from requests.adapters import HTTPAdapter
|
| 143 |
try:
|
| 144 |
from urllib3.util.retry import Retry
|
| 145 |
except Exception:
|
| 146 |
-
Retry = None
|
| 147 |
|
| 148 |
def _to_list(x):
|
| 149 |
-
if x is None:
|
| 150 |
-
|
| 151 |
-
if isinstance(x, list):
|
| 152 |
-
# stringifier proprement les éléments non-str
|
| 153 |
-
return [str(t).strip() for t in x if str(t).strip()]
|
| 154 |
if isinstance(x, str):
|
| 155 |
-
|
| 156 |
-
return parts
|
| 157 |
return [str(x).strip()]
|
| 158 |
|
| 159 |
def _first(*vals):
|
| 160 |
for v in vals:
|
| 161 |
-
if isinstance(v, str) and v.strip():
|
| 162 |
-
return v.strip()
|
| 163 |
return ""
|
| 164 |
|
| 165 |
code = re.sub(r"\D", "", ean or "")
|
| 166 |
if not code:
|
| 167 |
-
return
|
| 168 |
|
| 169 |
sess = requests.Session()
|
| 170 |
-
sess.headers.update({
|
| 171 |
-
"User-Agent": "insee-coicop-agent/1.0",
|
| 172 |
-
"Accept": "application/json",
|
| 173 |
-
})
|
| 174 |
if Retry:
|
| 175 |
-
retry = Retry(
|
| 176 |
-
|
| 177 |
-
status_forcelist=[429, 500, 502, 503, 504],
|
| 178 |
-
allowed_methods=frozenset(["GET"]),
|
| 179 |
-
raise_on_status=False,
|
| 180 |
-
)
|
| 181 |
sess.mount("https://", HTTPAdapter(max_retries=retry))
|
| 182 |
|
| 183 |
urls = [
|
|
@@ -201,44 +188,30 @@ class OFFByEAN(Tool):
|
|
| 201 |
status = data.get("status", 1 if product else 0)
|
| 202 |
if status == 1 or product:
|
| 203 |
p = product or {}
|
| 204 |
-
# Normalisation stricte des champs
|
| 205 |
product_name = _first(p.get("product_name_fr"), p.get("product_name"))
|
| 206 |
-
categories_tags = (
|
| 207 |
-
p.get("categories_tags_fr")
|
| 208 |
-
or p.get("categories_tags")
|
| 209 |
-
or p.get("categories")
|
| 210 |
-
)
|
| 211 |
categories_tags = _to_list(categories_tags)
|
| 212 |
ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
|
| 213 |
brands = _first(p.get("brands"), None)
|
| 214 |
stores = _first(p.get("stores"), None)
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
"
|
| 218 |
-
"status": status,
|
| 219 |
-
"status_verbose": data.get("status_verbose"),
|
| 220 |
-
"code": code,
|
| 221 |
-
"used_url": u,
|
| 222 |
-
# Champs lisibles directement par le LLM
|
| 223 |
"product_name": product_name,
|
| 224 |
-
"categories_tags": categories_tags,
|
| 225 |
"ingredients_text": ingredients_text,
|
| 226 |
-
"brands": brands,
|
| 227 |
-
"
|
| 228 |
-
"stores": stores,
|
| 229 |
-
"stores_list": _to_list(stores),
|
| 230 |
-
# 🔑 Bloc prêt pour l'étape 3
|
| 231 |
"step3_inputs": {
|
| 232 |
"product_name": product_name,
|
| 233 |
"categories_tags": categories_tags,
|
| 234 |
"ingredients_text": ingredients_text,
|
| 235 |
},
|
| 236 |
}
|
| 237 |
-
return json.dumps(out)
|
| 238 |
except Exception as e:
|
| 239 |
last_err = str(e)
|
| 240 |
|
| 241 |
-
return
|
| 242 |
|
| 243 |
|
| 244 |
|
|
@@ -246,9 +219,8 @@ class OFFByEAN(Tool):
|
|
| 246 |
class RegexCOICOP(Tool):
|
| 247 |
name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
|
| 248 |
inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
|
| 249 |
-
output_type = "
|
| 250 |
|
| 251 |
-
# précompile ici pour rester autonome
|
| 252 |
import re as _re
|
| 253 |
SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
|
| 254 |
PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
|
|
@@ -264,54 +236,73 @@ class RegexCOICOP(Tool):
|
|
| 264 |
s = re.sub(r"[^A-Z0-9% ]+", " ", s)
|
| 265 |
return re.sub(r"\s+", " ", s).strip()
|
| 266 |
|
| 267 |
-
def forward(self, text: str)
|
| 268 |
-
import
|
| 269 |
s = self._normalize_txt(text); c=[]
|
| 270 |
if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
|
| 271 |
if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
|
| 272 |
if self.GOAT.search(s): c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
|
| 273 |
if self.PROC.search(s): c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
|
| 274 |
if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
|
| 275 |
-
return
|
| 276 |
|
| 277 |
|
| 278 |
# ---- OFFtoCOICOP : normalisation locale + regex règles ----
|
| 279 |
class OFFtoCOICOP(Tool):
|
| 280 |
-
name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP."
|
| 281 |
inputs = {
|
| 282 |
"product_name": {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
|
| 283 |
"categories_tags": {"type":"array", "description":"Liste OFF categories_tags.", "nullable": True},
|
| 284 |
"ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
|
|
|
|
| 285 |
}
|
| 286 |
-
output_type="
|
| 287 |
-
import re as _re
|
| 288 |
-
RULES = [
|
| 289 |
-
(_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
|
| 290 |
-
(_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
|
| 291 |
-
(_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
|
| 292 |
-
(_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
|
| 293 |
-
]
|
| 294 |
|
| 295 |
-
|
| 296 |
-
def _normalize_txt(s: str) -> str:
|
| 297 |
import unicodedata, re
|
| 298 |
if not s: return ""
|
| 299 |
s = s.upper()
|
| 300 |
s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
|
| 301 |
s = re.sub(r"[^A-Z0-9% ]+", " ", s)
|
| 302 |
return re.sub(r"\s+", " ", s).strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
-
def forward(self, product_name=None, categories_tags=None, ingredients_text=None) -> str:
|
| 305 |
-
import json
|
| 306 |
text = " ".join([t for t in [
|
| 307 |
self._normalize_txt(product_name or ""),
|
| 308 |
-
self._normalize_txt(" ".join(categories_tags
|
| 309 |
self._normalize_txt(ingredients_text or "")
|
| 310 |
] if t])
|
|
|
|
| 311 |
c=[]
|
| 312 |
for rx,(code,score,why) in self.RULES:
|
| 313 |
if rx.search(text): c.append({"code":code,"why":why,"score":score})
|
| 314 |
-
|
|
|
|
| 315 |
|
| 316 |
|
| 317 |
# ---- SemSim : COICOP embarqué + import lazy du modèle ----
|
|
@@ -319,11 +310,9 @@ class SemSim(Tool):
|
|
| 319 |
name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
|
| 320 |
inputs = {"text":{"type":"string","description":"Texte libellé"},
|
| 321 |
"topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
|
| 322 |
-
output_type = "
|
| 323 |
-
# packages nécessaires côté Hub
|
| 324 |
requirements = ["sentence_transformers", "torch"]
|
| 325 |
|
| 326 |
-
# mini référentiel embarqué pour l'export Hub
|
| 327 |
COICOP_ITEMS = [
|
| 328 |
{"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
|
| 329 |
{"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
|
|
@@ -344,10 +333,8 @@ class SemSim(Tool):
|
|
| 344 |
s = re.sub(r"[^A-Z0-9% ]+", " ", s)
|
| 345 |
return re.sub(r"\s+", " ", s).strip()
|
| 346 |
|
| 347 |
-
def forward(self, text: str, topk: int = 5)
|
| 348 |
-
import json
|
| 349 |
from sentence_transformers import SentenceTransformer, util
|
| 350 |
-
# lazy init pour la compat hub
|
| 351 |
if not hasattr(self, "_model"):
|
| 352 |
self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 353 |
q = self._normalize_txt(text)
|
|
@@ -360,22 +347,27 @@ class SemSim(Tool):
|
|
| 360 |
for i in range(len(self.COICOP_ITEMS))],
|
| 361 |
key=lambda x: x["score"], reverse=True
|
| 362 |
)
|
| 363 |
-
return
|
| 364 |
|
| 365 |
|
| 366 |
# ---- Resolve : import local json ----
|
| 367 |
class Resolve(Tool):
|
| 368 |
name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
|
| 369 |
-
inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str) d'autres tools."},
|
| 370 |
"topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
|
| 371 |
-
output_type = "
|
| 372 |
|
| 373 |
-
def forward(self, json_lists, topn: int = 3)
|
|
|
|
| 374 |
import json
|
| 375 |
from typing import Dict, Any
|
| 376 |
bucket: Dict[str, Dict[str, Any]] = {}
|
| 377 |
for s in json_lists:
|
| 378 |
-
data =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
for c in data.get("candidates", []):
|
| 380 |
code = c["code"]; score = float(c.get("score", 0.0))
|
| 381 |
why = c.get("why", "") or c.get("label", "")
|
|
@@ -388,47 +380,7 @@ class Resolve(Tool):
|
|
| 388 |
for v in bucket.values():
|
| 389 |
v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
|
| 390 |
ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
|
| 391 |
-
if not ranked: return
|
| 392 |
final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
|
| 393 |
exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
|
| 394 |
-
return
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
def build_agent(model_id: str | None = None) -> CodeAgent:
|
| 399 |
-
model_id = model_id or "Qwen/Qwen2.5-Coder-7B-Instruct" # léger pour tester
|
| 400 |
-
agent = CodeAgent(
|
| 401 |
-
tools=[ValidateEANTool(), OFFByEAN(), RegexCOICOP(), OFFtoCOICOP(), SemSim(), Resolve()],
|
| 402 |
-
model=InferenceClientModel(model_id=model_id),
|
| 403 |
-
add_base_tools=False,
|
| 404 |
-
max_steps=6,
|
| 405 |
-
verbosity_level=2,
|
| 406 |
-
)
|
| 407 |
-
return agent
|
| 408 |
-
|
| 409 |
-
def parse_result(res):
|
| 410 |
-
if isinstance(res, dict): return res
|
| 411 |
-
try: return ast.literal_eval(res)
|
| 412 |
-
except Exception: return {"raw": res}
|
| 413 |
-
|
| 414 |
-
if __name__ == "__main__":
|
| 415 |
-
# Remplace par les vraies données si possible - uniquement du test
|
| 416 |
-
ean = "3256221112345" # EAN fictif (peut ne pas exister sur OFF)
|
| 417 |
-
label = "Camembert au lait cru AOP 250g - ALDI"
|
| 418 |
-
|
| 419 |
-
agent = build_agent()
|
| 420 |
-
task = f"""
|
| 421 |
-
Classe ce produit en COICOP:
|
| 422 |
-
EAN: {ean}
|
| 423 |
-
Libellé: {label}
|
| 424 |
-
Pipeline:
|
| 425 |
-
1) validate_ean(ean)
|
| 426 |
-
2) openfoodfacts_product_by_ean(ean) # si OFF ne trouve pas, on s'appuie sur regex + embeddings
|
| 427 |
-
3) map_off_to_coicop(product_name, categories_tags, ingredients_text)
|
| 428 |
-
4) coicop_regex_rules(text=libellé)
|
| 429 |
-
5) coicop_semantic_similarity(text=libellé, topk=5)
|
| 430 |
-
6) resolve_coicop_candidates([...], topn=3)
|
| 431 |
-
Attend un JSON final.
|
| 432 |
-
"""
|
| 433 |
-
out = agent.run(task)
|
| 434 |
-
print(parse_result(out))
|
|
|
|
| 39 |
class ValidateEANTool(Tool):
|
| 40 |
name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
|
| 41 |
inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
|
| 42 |
+
output_type = "dict" # <— (facultatif)
|
| 43 |
|
| 44 |
+
def forward(self, ean: str):
|
| 45 |
+
import re
|
| 46 |
digits = re.sub(r"\D", "", ean or "")
|
| 47 |
if len(digits) not in (8, 12, 13, 14):
|
| 48 |
+
return {"valid": False, "normalized": digits}
|
| 49 |
total = 0
|
| 50 |
for i, ch in enumerate(reversed(digits[:-1]), start=1):
|
| 51 |
n = int(ch); total += n * (3 if i % 2 == 1 else 1)
|
| 52 |
check = (10 - (total % 10)) % 10
|
| 53 |
+
return {"valid": check == int(digits[-1]), "normalized": digits}
|
|
|
|
| 54 |
|
| 55 |
# ---- OFFtoCOICOP : accepte off_payload (JSON brut) OU champs séparés ----
|
| 56 |
class OFFtoCOICOP(Tool):
|
|
|
|
| 133 |
name = "openfoodfacts_product_by_ean"
|
| 134 |
description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
|
| 135 |
inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
|
| 136 |
+
output_type = "dict"
|
| 137 |
+
requirements = ["requests"]
|
| 138 |
|
| 139 |
+
def forward(self, ean: str):
|
| 140 |
+
import re, requests
|
| 141 |
from requests.adapters import HTTPAdapter
|
| 142 |
try:
|
| 143 |
from urllib3.util.retry import Retry
|
| 144 |
except Exception:
|
| 145 |
+
Retry = None
|
| 146 |
|
| 147 |
def _to_list(x):
|
| 148 |
+
if x is None: return []
|
| 149 |
+
if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
|
|
|
|
|
|
|
|
|
|
| 150 |
if isinstance(x, str):
|
| 151 |
+
return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
|
|
|
|
| 152 |
return [str(x).strip()]
|
| 153 |
|
| 154 |
def _first(*vals):
|
| 155 |
for v in vals:
|
| 156 |
+
if isinstance(v, str) and v.strip(): return v.strip()
|
|
|
|
| 157 |
return ""
|
| 158 |
|
| 159 |
code = re.sub(r"\D", "", ean or "")
|
| 160 |
if not code:
|
| 161 |
+
return {"ok": False, "status": 0, "code": "", "error": "EAN vide"}
|
| 162 |
|
| 163 |
sess = requests.Session()
|
| 164 |
+
sess.headers.update({"User-Agent":"insee-coicop-agent/1.0","Accept":"application/json"})
|
|
|
|
|
|
|
|
|
|
| 165 |
if Retry:
|
| 166 |
+
retry = Retry(total=3, backoff_factor=0.5, status_forcelist=[429,500,502,503,504],
|
| 167 |
+
allowed_methods=frozenset(["GET"]), raise_on_status=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
sess.mount("https://", HTTPAdapter(max_retries=retry))
|
| 169 |
|
| 170 |
urls = [
|
|
|
|
| 188 |
status = data.get("status", 1 if product else 0)
|
| 189 |
if status == 1 or product:
|
| 190 |
p = product or {}
|
|
|
|
| 191 |
product_name = _first(p.get("product_name_fr"), p.get("product_name"))
|
| 192 |
+
categories_tags = p.get("categories_tags_fr") or p.get("categories_tags") or p.get("categories")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
categories_tags = _to_list(categories_tags)
|
| 194 |
ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
|
| 195 |
brands = _first(p.get("brands"), None)
|
| 196 |
stores = _first(p.get("stores"), None)
|
| 197 |
+
return {
|
| 198 |
+
"ok": True, "status": status, "status_verbose": data.get("status_verbose"),
|
| 199 |
+
"code": code, "used_url": u,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
"product_name": product_name,
|
| 201 |
+
"categories_tags": categories_tags,
|
| 202 |
"ingredients_text": ingredients_text,
|
| 203 |
+
"brands": brands, "brands_list": _to_list(brands),
|
| 204 |
+
"stores": stores, "stores_list": _to_list(stores),
|
|
|
|
|
|
|
|
|
|
| 205 |
"step3_inputs": {
|
| 206 |
"product_name": product_name,
|
| 207 |
"categories_tags": categories_tags,
|
| 208 |
"ingredients_text": ingredients_text,
|
| 209 |
},
|
| 210 |
}
|
|
|
|
| 211 |
except Exception as e:
|
| 212 |
last_err = str(e)
|
| 213 |
|
| 214 |
+
return {"ok": False, "status": 0, "code": code, "error": last_err or "not found"}
|
| 215 |
|
| 216 |
|
| 217 |
|
|
|
|
| 219 |
class RegexCOICOP(Tool):
|
| 220 |
name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
|
| 221 |
inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
|
| 222 |
+
output_type = "dict"
|
| 223 |
|
|
|
|
| 224 |
import re as _re
|
| 225 |
SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
|
| 226 |
PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
|
|
|
|
| 236 |
s = re.sub(r"[^A-Z0-9% ]+", " ", s)
|
| 237 |
return re.sub(r"\s+", " ", s).strip()
|
| 238 |
|
| 239 |
+
def forward(self, text: str):
|
| 240 |
+
import re
|
| 241 |
s = self._normalize_txt(text); c=[]
|
| 242 |
if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
|
| 243 |
if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
|
| 244 |
if self.GOAT.search(s): c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
|
| 245 |
if self.PROC.search(s): c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
|
| 246 |
if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
|
| 247 |
+
return {"candidates": c}
|
| 248 |
|
| 249 |
|
| 250 |
# ---- OFFtoCOICOP : normalisation locale + regex règles ----
|
| 251 |
class OFFtoCOICOP(Tool):
|
| 252 |
+
name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP (off_payload ou champs séparés)."
|
| 253 |
inputs = {
|
| 254 |
"product_name": {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
|
| 255 |
"categories_tags": {"type":"array", "description":"Liste OFF categories_tags.", "nullable": True},
|
| 256 |
"ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
|
| 257 |
+
"off_payload": {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
|
| 258 |
}
|
| 259 |
+
output_type="dict"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
+
import re as _re, json as _json, ast as _ast
|
| 262 |
+
def _normalize_txt(self, s: str) -> str:
|
| 263 |
import unicodedata, re
|
| 264 |
if not s: return ""
|
| 265 |
s = s.upper()
|
| 266 |
s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
|
| 267 |
s = re.sub(r"[^A-Z0-9% ]+", " ", s)
|
| 268 |
return re.sub(r"\s+", " ", s).strip()
|
| 269 |
+
def _to_list(self, x):
|
| 270 |
+
import re
|
| 271 |
+
if x is None: return []
|
| 272 |
+
if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
|
| 273 |
+
if isinstance(x, str): return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
|
| 274 |
+
return [str(x).strip()]
|
| 275 |
+
def _safe_parse(self, s):
|
| 276 |
+
try: return self._json.loads(s)
|
| 277 |
+
except Exception:
|
| 278 |
+
try: return self._ast.literal_eval(s)
|
| 279 |
+
except Exception: return {}
|
| 280 |
+
|
| 281 |
+
RULES = [
|
| 282 |
+
(_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
|
| 283 |
+
(_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
|
| 284 |
+
(_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
|
| 285 |
+
(_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
|
| 286 |
+
]
|
| 287 |
+
|
| 288 |
+
def forward(self, product_name=None, categories_tags=None, ingredients_text=None, off_payload=None):
|
| 289 |
+
if off_payload and not (product_name or categories_tags or ingredients_text):
|
| 290 |
+
data = self._safe_parse(off_payload) or {}
|
| 291 |
+
product_name = data.get("product_name") or ""
|
| 292 |
+
categories_tags = self._to_list(data.get("categories_tags"))
|
| 293 |
+
ingredients_text= data.get("ingredients_text") or ""
|
| 294 |
|
|
|
|
|
|
|
| 295 |
text = " ".join([t for t in [
|
| 296 |
self._normalize_txt(product_name or ""),
|
| 297 |
+
self._normalize_txt(" ".join(self._to_list(categories_tags))),
|
| 298 |
self._normalize_txt(ingredients_text or "")
|
| 299 |
] if t])
|
| 300 |
+
|
| 301 |
c=[]
|
| 302 |
for rx,(code,score,why) in self.RULES:
|
| 303 |
if rx.search(text): c.append({"code":code,"why":why,"score":score})
|
| 304 |
+
|
| 305 |
+
return {"candidates": c}
|
| 306 |
|
| 307 |
|
| 308 |
# ---- SemSim : COICOP embarqué + import lazy du modèle ----
|
|
|
|
| 310 |
name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
|
| 311 |
inputs = {"text":{"type":"string","description":"Texte libellé"},
|
| 312 |
"topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
|
| 313 |
+
output_type = "dict"
|
|
|
|
| 314 |
requirements = ["sentence_transformers", "torch"]
|
| 315 |
|
|
|
|
| 316 |
COICOP_ITEMS = [
|
| 317 |
{"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
|
| 318 |
{"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
|
|
|
|
| 333 |
s = re.sub(r"[^A-Z0-9% ]+", " ", s)
|
| 334 |
return re.sub(r"\s+", " ", s).strip()
|
| 335 |
|
| 336 |
+
def forward(self, text: str, topk: int = 5):
|
|
|
|
| 337 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
| 338 |
if not hasattr(self, "_model"):
|
| 339 |
self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
| 340 |
q = self._normalize_txt(text)
|
|
|
|
| 347 |
for i in range(len(self.COICOP_ITEMS))],
|
| 348 |
key=lambda x: x["score"], reverse=True
|
| 349 |
)
|
| 350 |
+
return {"candidates": ranked[:max(1,int(topk))]}
|
| 351 |
|
| 352 |
|
| 353 |
# ---- Resolve : import local json ----
|
| 354 |
class Resolve(Tool):
|
| 355 |
name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
|
| 356 |
+
inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str/dict) d'autres tools."},
|
| 357 |
"topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
|
| 358 |
+
output_type = "dict"
|
| 359 |
|
| 360 |
+
def forward(self, json_lists, topn: int = 3):
|
| 361 |
+
# accepter listes de strings JSON OU de dicts
|
| 362 |
import json
|
| 363 |
from typing import Dict, Any
|
| 364 |
bucket: Dict[str, Dict[str, Any]] = {}
|
| 365 |
for s in json_lists:
|
| 366 |
+
data = s
|
| 367 |
+
if isinstance(s, str):
|
| 368 |
+
try: data = json.loads(s)
|
| 369 |
+
except Exception: data = {}
|
| 370 |
+
if not isinstance(data, dict): continue
|
| 371 |
for c in data.get("candidates", []):
|
| 372 |
code = c["code"]; score = float(c.get("score", 0.0))
|
| 373 |
why = c.get("why", "") or c.get("label", "")
|
|
|
|
| 380 |
for v in bucket.values():
|
| 381 |
v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
|
| 382 |
ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
|
| 383 |
+
if not ranked: return {"final": None, "alternatives": [], "explanation":"Aucun candidat"}
|
| 384 |
final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
|
| 385 |
exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
|
| 386 |
+
return {"final": final, "alternatives": alts, "explanation": exp}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|