Marylene commited on
Commit
958ec1d
·
verified ·
1 Parent(s): 9ff51f8

Mise en forme

Browse files
Files changed (2) hide show
  1. app.py +4 -5
  2. quick_deploy_agent.py +78 -126
app.py CHANGED
@@ -74,10 +74,9 @@ Pipeline :
74
  6) resolve_coicop_candidates([...], topn=3)
75
 
76
  Règles strictes :
77
- - Pour (4) et (5), UTILISE EXCLUSIVEMENT le libellé fourni par l'utilisateur (ne lis pas la réponse OFF).
78
- - Si le libellé est vide, effectue (4) et (5) avec une chaîne vide (pas d'indexation JSON).
79
- Sortie :
80
- - Retourne UNIQUEMENT un objet JSON valide, sans backticks, sans prose.
81
  """
82
 
83
 
@@ -166,7 +165,7 @@ theme = gr.themes.Soft(
166
 
167
  custom_css = """
168
  :root{
169
- --insee-primary: #0b3d91;
170
  --insee-primary-700:#0a357f;
171
  --insee-accent: #ff5c35;
172
  --insee-neutral-50:#f8fafc;
 
74
  6) resolve_coicop_candidates([...], topn=3)
75
 
76
  Règles strictes :
77
+ - Les outils renvoient des objets Python (dict/list). Tu peux indexer directement.
78
+ - Pour (4) et (5), utilise le libellé utilisateur (pas besoin de lire la réponse d’OFF).
79
+ - Retourne uniquement un JSON valide (objet), sans backticks.
 
80
  """
81
 
82
 
 
165
 
166
  custom_css = """
167
  :root{
168
+ --insee-primary: #89c2d9;
169
  --insee-primary-700:#0a357f;
170
  --insee-accent: #ff5c35;
171
  --insee-neutral-50:#f8fafc;
quick_deploy_agent.py CHANGED
@@ -39,19 +39,18 @@ def ean_check_digit_ok(ean: str) -> bool:
39
  class ValidateEANTool(Tool):
40
  name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
41
  inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
42
- output_type = "string"
43
 
44
- def forward(self, ean: str) -> str:
45
- import json, re
46
  digits = re.sub(r"\D", "", ean or "")
47
  if len(digits) not in (8, 12, 13, 14):
48
- return json.dumps({"valid": False, "normalized": digits})
49
  total = 0
50
  for i, ch in enumerate(reversed(digits[:-1]), start=1):
51
  n = int(ch); total += n * (3 if i % 2 == 1 else 1)
52
  check = (10 - (total % 10)) % 10
53
- return json.dumps({"valid": check == int(digits[-1]), "normalized": digits})
54
-
55
 
56
  # ---- OFFtoCOICOP : accepte off_payload (JSON brut) OU champs séparés ----
57
  class OFFtoCOICOP(Tool):
@@ -134,50 +133,38 @@ class OFFByEAN(Tool):
134
  name = "openfoodfacts_product_by_ean"
135
  description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
136
  inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
137
- output_type = "string"
138
- requirements = ["requests"] # urllib3 vient via requests
139
 
140
- def forward(self, ean: str) -> str:
141
- import re, json, requests
142
  from requests.adapters import HTTPAdapter
143
  try:
144
  from urllib3.util.retry import Retry
145
  except Exception:
146
- Retry = None # environnement minimal
147
 
148
  def _to_list(x):
149
- if x is None:
150
- return []
151
- if isinstance(x, list):
152
- # stringifier proprement les éléments non-str
153
- return [str(t).strip() for t in x if str(t).strip()]
154
  if isinstance(x, str):
155
- parts = [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
156
- return parts
157
  return [str(x).strip()]
158
 
159
  def _first(*vals):
160
  for v in vals:
161
- if isinstance(v, str) and v.strip():
162
- return v.strip()
163
  return ""
164
 
165
  code = re.sub(r"\D", "", ean or "")
166
  if not code:
167
- return json.dumps({"ok": False, "status": 0, "code": "", "error": "EAN vide"})
168
 
169
  sess = requests.Session()
170
- sess.headers.update({
171
- "User-Agent": "insee-coicop-agent/1.0",
172
- "Accept": "application/json",
173
- })
174
  if Retry:
175
- retry = Retry(
176
- total=3, backoff_factor=0.5,
177
- status_forcelist=[429, 500, 502, 503, 504],
178
- allowed_methods=frozenset(["GET"]),
179
- raise_on_status=False,
180
- )
181
  sess.mount("https://", HTTPAdapter(max_retries=retry))
182
 
183
  urls = [
@@ -201,44 +188,30 @@ class OFFByEAN(Tool):
201
  status = data.get("status", 1 if product else 0)
202
  if status == 1 or product:
203
  p = product or {}
204
- # Normalisation stricte des champs
205
  product_name = _first(p.get("product_name_fr"), p.get("product_name"))
206
- categories_tags = (
207
- p.get("categories_tags_fr")
208
- or p.get("categories_tags")
209
- or p.get("categories")
210
- )
211
  categories_tags = _to_list(categories_tags)
212
  ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
213
  brands = _first(p.get("brands"), None)
214
  stores = _first(p.get("stores"), None)
215
-
216
- out = {
217
- "ok": True,
218
- "status": status,
219
- "status_verbose": data.get("status_verbose"),
220
- "code": code,
221
- "used_url": u,
222
- # Champs lisibles directement par le LLM
223
  "product_name": product_name,
224
- "categories_tags": categories_tags, # toujours list[str]
225
  "ingredients_text": ingredients_text,
226
- "brands": brands, # string (OFF est souvent "brand1, brand2")
227
- "brands_list": _to_list(brands), # list[str] pratique
228
- "stores": stores,
229
- "stores_list": _to_list(stores),
230
- # 🔑 Bloc prêt pour l'étape 3
231
  "step3_inputs": {
232
  "product_name": product_name,
233
  "categories_tags": categories_tags,
234
  "ingredients_text": ingredients_text,
235
  },
236
  }
237
- return json.dumps(out)
238
  except Exception as e:
239
  last_err = str(e)
240
 
241
- return json.dumps({"ok": False, "status": 0, "code": code, "error": last_err or "not found"})
242
 
243
 
244
 
@@ -246,9 +219,8 @@ class OFFByEAN(Tool):
246
  class RegexCOICOP(Tool):
247
  name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
248
  inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
249
- output_type = "string"
250
 
251
- # précompile ici pour rester autonome
252
  import re as _re
253
  SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
254
  PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
@@ -264,54 +236,73 @@ class RegexCOICOP(Tool):
264
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
265
  return re.sub(r"\s+", " ", s).strip()
266
 
267
- def forward(self, text: str) -> str:
268
- import json, re
269
  s = self._normalize_txt(text); c=[]
270
  if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
271
  if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
272
  if self.GOAT.search(s): c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
273
  if self.PROC.search(s): c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
274
  if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
275
- return json.dumps({"candidates": c})
276
 
277
 
278
  # ---- OFFtoCOICOP : normalisation locale + regex règles ----
279
  class OFFtoCOICOP(Tool):
280
- name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP."
281
  inputs = {
282
  "product_name": {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
283
  "categories_tags": {"type":"array", "description":"Liste OFF categories_tags.", "nullable": True},
284
  "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
 
285
  }
286
- output_type="string"
287
- import re as _re
288
- RULES = [
289
- (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
290
- (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
291
- (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
292
- (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
293
- ]
294
 
295
- @staticmethod
296
- def _normalize_txt(s: str) -> str:
297
  import unicodedata, re
298
  if not s: return ""
299
  s = s.upper()
300
  s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
301
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
302
  return re.sub(r"\s+", " ", s).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- def forward(self, product_name=None, categories_tags=None, ingredients_text=None) -> str:
305
- import json
306
  text = " ".join([t for t in [
307
  self._normalize_txt(product_name or ""),
308
- self._normalize_txt(" ".join(categories_tags or [])),
309
  self._normalize_txt(ingredients_text or "")
310
  ] if t])
 
311
  c=[]
312
  for rx,(code,score,why) in self.RULES:
313
  if rx.search(text): c.append({"code":code,"why":why,"score":score})
314
- return json.dumps({"candidates": c})
 
315
 
316
 
317
  # ---- SemSim : COICOP embarqué + import lazy du modèle ----
@@ -319,11 +310,9 @@ class SemSim(Tool):
319
  name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
320
  inputs = {"text":{"type":"string","description":"Texte libellé"},
321
  "topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
322
- output_type = "string"
323
- # packages nécessaires côté Hub
324
  requirements = ["sentence_transformers", "torch"]
325
 
326
- # mini référentiel embarqué pour l'export Hub
327
  COICOP_ITEMS = [
328
  {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
329
  {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
@@ -344,10 +333,8 @@ class SemSim(Tool):
344
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
345
  return re.sub(r"\s+", " ", s).strip()
346
 
347
- def forward(self, text: str, topk: int = 5) -> str:
348
- import json
349
  from sentence_transformers import SentenceTransformer, util
350
- # lazy init pour la compat hub
351
  if not hasattr(self, "_model"):
352
  self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
353
  q = self._normalize_txt(text)
@@ -360,22 +347,27 @@ class SemSim(Tool):
360
  for i in range(len(self.COICOP_ITEMS))],
361
  key=lambda x: x["score"], reverse=True
362
  )
363
- return json.dumps({"candidates": ranked[:max(1,int(topk))]})
364
 
365
 
366
  # ---- Resolve : import local json ----
367
  class Resolve(Tool):
368
  name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
369
- inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str) d'autres tools."},
370
  "topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
371
- output_type = "string"
372
 
373
- def forward(self, json_lists, topn: int = 3) -> str:
 
374
  import json
375
  from typing import Dict, Any
376
  bucket: Dict[str, Dict[str, Any]] = {}
377
  for s in json_lists:
378
- data = json.loads(s) if s else {}
 
 
 
 
379
  for c in data.get("candidates", []):
380
  code = c["code"]; score = float(c.get("score", 0.0))
381
  why = c.get("why", "") or c.get("label", "")
@@ -388,47 +380,7 @@ class Resolve(Tool):
388
  for v in bucket.values():
389
  v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
390
  ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
391
- if not ranked: return json.dumps({"final": None, "alternatives": [], "explanation":"Aucun candidat"})
392
  final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
393
  exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
394
- return json.dumps({"final": final, "alternatives": alts, "explanation": exp})
395
-
396
-
397
-
398
- def build_agent(model_id: str | None = None) -> CodeAgent:
399
- model_id = model_id or "Qwen/Qwen2.5-Coder-7B-Instruct" # léger pour tester
400
- agent = CodeAgent(
401
- tools=[ValidateEANTool(), OFFByEAN(), RegexCOICOP(), OFFtoCOICOP(), SemSim(), Resolve()],
402
- model=InferenceClientModel(model_id=model_id),
403
- add_base_tools=False,
404
- max_steps=6,
405
- verbosity_level=2,
406
- )
407
- return agent
408
-
409
- def parse_result(res):
410
- if isinstance(res, dict): return res
411
- try: return ast.literal_eval(res)
412
- except Exception: return {"raw": res}
413
-
414
- if __name__ == "__main__":
415
- # Remplace par les vraies données si possible - uniquement du test
416
- ean = "3256221112345" # EAN fictif (peut ne pas exister sur OFF)
417
- label = "Camembert au lait cru AOP 250g - ALDI"
418
-
419
- agent = build_agent()
420
- task = f"""
421
- Classe ce produit en COICOP:
422
- EAN: {ean}
423
- Libellé: {label}
424
- Pipeline:
425
- 1) validate_ean(ean)
426
- 2) openfoodfacts_product_by_ean(ean) # si OFF ne trouve pas, on s'appuie sur regex + embeddings
427
- 3) map_off_to_coicop(product_name, categories_tags, ingredients_text)
428
- 4) coicop_regex_rules(text=libellé)
429
- 5) coicop_semantic_similarity(text=libellé, topk=5)
430
- 6) resolve_coicop_candidates([...], topn=3)
431
- Attend un JSON final.
432
- """
433
- out = agent.run(task)
434
- print(parse_result(out))
 
39
  class ValidateEANTool(Tool):
40
  name, description = "validate_ean", "Valide un EAN/GTIN (clé GS1)."
41
  inputs = {"ean": {"type": "string", "description": "Code EAN/GTIN (8/12/13/14 chiffres)."}}
42
+ output_type = "dict" # <— (facultatif)
43
 
44
+ def forward(self, ean: str):
45
+ import re
46
  digits = re.sub(r"\D", "", ean or "")
47
  if len(digits) not in (8, 12, 13, 14):
48
+ return {"valid": False, "normalized": digits}
49
  total = 0
50
  for i, ch in enumerate(reversed(digits[:-1]), start=1):
51
  n = int(ch); total += n * (3 if i % 2 == 1 else 1)
52
  check = (10 - (total % 10)) % 10
53
+ return {"valid": check == int(digits[-1]), "normalized": digits}
 
54
 
55
  # ---- OFFtoCOICOP : accepte off_payload (JSON brut) OU champs séparés ----
56
  class OFFtoCOICOP(Tool):
 
133
  name = "openfoodfacts_product_by_ean"
134
  description = "Open Food Facts /api/v0|v2/product/{ean} (name, brands, categories...)."
135
  inputs = {"ean": {"type": "string", "description": "EAN à interroger sur l'API OFF."}}
136
+ output_type = "dict"
137
+ requirements = ["requests"]
138
 
139
+ def forward(self, ean: str):
140
+ import re, requests
141
  from requests.adapters import HTTPAdapter
142
  try:
143
  from urllib3.util.retry import Retry
144
  except Exception:
145
+ Retry = None
146
 
147
  def _to_list(x):
148
+ if x is None: return []
149
+ if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
 
 
 
150
  if isinstance(x, str):
151
+ return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
 
152
  return [str(x).strip()]
153
 
154
  def _first(*vals):
155
  for v in vals:
156
+ if isinstance(v, str) and v.strip(): return v.strip()
 
157
  return ""
158
 
159
  code = re.sub(r"\D", "", ean or "")
160
  if not code:
161
+ return {"ok": False, "status": 0, "code": "", "error": "EAN vide"}
162
 
163
  sess = requests.Session()
164
+ sess.headers.update({"User-Agent":"insee-coicop-agent/1.0","Accept":"application/json"})
 
 
 
165
  if Retry:
166
+ retry = Retry(total=3, backoff_factor=0.5, status_forcelist=[429,500,502,503,504],
167
+ allowed_methods=frozenset(["GET"]), raise_on_status=False)
 
 
 
 
168
  sess.mount("https://", HTTPAdapter(max_retries=retry))
169
 
170
  urls = [
 
188
  status = data.get("status", 1 if product else 0)
189
  if status == 1 or product:
190
  p = product or {}
 
191
  product_name = _first(p.get("product_name_fr"), p.get("product_name"))
192
+ categories_tags = p.get("categories_tags_fr") or p.get("categories_tags") or p.get("categories")
 
 
 
 
193
  categories_tags = _to_list(categories_tags)
194
  ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
195
  brands = _first(p.get("brands"), None)
196
  stores = _first(p.get("stores"), None)
197
+ return {
198
+ "ok": True, "status": status, "status_verbose": data.get("status_verbose"),
199
+ "code": code, "used_url": u,
 
 
 
 
 
200
  "product_name": product_name,
201
+ "categories_tags": categories_tags,
202
  "ingredients_text": ingredients_text,
203
+ "brands": brands, "brands_list": _to_list(brands),
204
+ "stores": stores, "stores_list": _to_list(stores),
 
 
 
205
  "step3_inputs": {
206
  "product_name": product_name,
207
  "categories_tags": categories_tags,
208
  "ingredients_text": ingredients_text,
209
  },
210
  }
 
211
  except Exception as e:
212
  last_err = str(e)
213
 
214
+ return {"ok": False, "status": 0, "code": code, "error": last_err or "not found"}
215
 
216
 
217
 
 
219
  class RegexCOICOP(Tool):
220
  name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
221
  inputs = {"text": {"type": "string", "description": "Libellé produit (texte libre) à analyser."}}
222
+ output_type = "dict"
223
 
 
224
  import re as _re
225
  SOFT = _re.compile(r"(?:\b|^)(?:CAMEMB(?:ERT)?|BRIE|COULOMMI(?:ERS?)?|BLEU|ROQUEFORT|GORGONZ(?:OLA)?|REBLOCHON|MUNSTER)(?:\b|$)")
226
  PRESS = _re.compile(r"(?:\b|^)(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)(?:\b|$)")
 
236
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
237
  return re.sub(r"\s+", " ", s).strip()
238
 
239
+ def forward(self, text: str):
240
+ import re
241
  s = self._normalize_txt(text); c=[]
242
  if self.SOFT.search(s): c.append({"code":"01.1.4.5.2","why":"pâte molle/persillée","score":0.95})
243
  if self.PRESS.search(s): c.append({"code":"01.1.4.5.3","why":"pâte pressée","score":0.90})
244
  if self.GOAT.search(s): c.append({"code":"01.1.4.5.4","why":"chèvre","score":0.90})
245
  if self.PROC.search(s): c.append({"code":"01.1.4.5.5","why":"fondu/râpé/portions","score":0.85})
246
  if not c and re.search(r"\bFROMAGE\b", s): c.append({"code":"01.1.4.5","why":"générique fromage/laits caillés","score":0.6})
247
+ return {"candidates": c}
248
 
249
 
250
  # ---- OFFtoCOICOP : normalisation locale + regex règles ----
251
  class OFFtoCOICOP(Tool):
252
+ name, description = "map_off_to_coicop", "Mappe catégories OFF vers COICOP (off_payload ou champs séparés)."
253
  inputs = {
254
  "product_name": {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
255
  "categories_tags": {"type":"array", "description":"Liste OFF categories_tags.", "nullable": True},
256
  "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
257
+ "off_payload": {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
258
  }
259
+ output_type="dict"
 
 
 
 
 
 
 
260
 
261
+ import re as _re, json as _json, ast as _ast
262
+ def _normalize_txt(self, s: str) -> str:
263
  import unicodedata, re
264
  if not s: return ""
265
  s = s.upper()
266
  s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
267
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
268
  return re.sub(r"\s+", " ", s).strip()
269
+ def _to_list(self, x):
270
+ import re
271
+ if x is None: return []
272
+ if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
273
+ if isinstance(x, str): return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
274
+ return [str(x).strip()]
275
+ def _safe_parse(self, s):
276
+ try: return self._json.loads(s)
277
+ except Exception:
278
+ try: return self._ast.literal_eval(s)
279
+ except Exception: return {}
280
+
281
+ RULES = [
282
+ (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
283
+ (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
284
+ (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
285
+ (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.85,"OFF: fondu/rapé/portions")),
286
+ ]
287
+
288
+ def forward(self, product_name=None, categories_tags=None, ingredients_text=None, off_payload=None):
289
+ if off_payload and not (product_name or categories_tags or ingredients_text):
290
+ data = self._safe_parse(off_payload) or {}
291
+ product_name = data.get("product_name") or ""
292
+ categories_tags = self._to_list(data.get("categories_tags"))
293
+ ingredients_text= data.get("ingredients_text") or ""
294
 
 
 
295
  text = " ".join([t for t in [
296
  self._normalize_txt(product_name or ""),
297
+ self._normalize_txt(" ".join(self._to_list(categories_tags))),
298
  self._normalize_txt(ingredients_text or "")
299
  ] if t])
300
+
301
  c=[]
302
  for rx,(code,score,why) in self.RULES:
303
  if rx.search(text): c.append({"code":code,"why":why,"score":score})
304
+
305
+ return {"candidates": c}
306
 
307
 
308
  # ---- SemSim : COICOP embarqué + import lazy du modèle ----
 
310
  name, description = "coicop_semantic_similarity", "Embeddings → top-k COICOP."
311
  inputs = {"text":{"type":"string","description":"Texte libellé"},
312
  "topk":{"type":"integer","description":"Nombre de candidats (défaut 5)","nullable":True}}
313
+ output_type = "dict"
 
314
  requirements = ["sentence_transformers", "torch"]
315
 
 
316
  COICOP_ITEMS = [
317
  {"code": "01.1.4.5.1", "label": "Laits caillés, fromage blanc, petites crèmes fromagères"},
318
  {"code": "01.1.4.5.2", "label": "Fromage à pâte molle et à pâte persillée"},
 
333
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
334
  return re.sub(r"\s+", " ", s).strip()
335
 
336
+ def forward(self, text: str, topk: int = 5):
 
337
  from sentence_transformers import SentenceTransformer, util
 
338
  if not hasattr(self, "_model"):
339
  self._model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
340
  q = self._normalize_txt(text)
 
347
  for i in range(len(self.COICOP_ITEMS))],
348
  key=lambda x: x["score"], reverse=True
349
  )
350
+ return {"candidates": ranked[:max(1,int(topk))]}
351
 
352
 
353
  # ---- Resolve : import local json ----
354
  class Resolve(Tool):
355
  name, description = "resolve_coicop_candidates", "Fusionne candidats → choix final + alternatives + explication."
356
+ inputs = {"json_lists": {"type":"array","description":"Liste de JSON (str/dict) d'autres tools."},
357
  "topn":{"type":"integer","description":"Nb d'alternatives (défaut 3)","nullable":True}}
358
+ output_type = "dict"
359
 
360
+ def forward(self, json_lists, topn: int = 3):
361
+ # accepter listes de strings JSON OU de dicts
362
  import json
363
  from typing import Dict, Any
364
  bucket: Dict[str, Dict[str, Any]] = {}
365
  for s in json_lists:
366
+ data = s
367
+ if isinstance(s, str):
368
+ try: data = json.loads(s)
369
+ except Exception: data = {}
370
+ if not isinstance(data, dict): continue
371
  for c in data.get("candidates", []):
372
  code = c["code"]; score = float(c.get("score", 0.0))
373
  why = c.get("why", "") or c.get("label", "")
 
380
  for v in bucket.values():
381
  v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
382
  ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
383
+ if not ranked: return {"final": None, "alternatives": [], "explanation":"Aucun candidat"}
384
  final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
385
  exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
386
+ return {"final": final, "alternatives": alts, "explanation": exp}