Marylene commited on
Commit
5823e9e
·
verified ·
1 Parent(s): d768331

meilleure lecture des ingrédients + nombreecho renvoyés

Browse files
Files changed (1) hide show
  1. quick_deploy_agent.py +146 -30
quick_deploy_agent.py CHANGED
@@ -59,7 +59,7 @@ class OFFByEAN(Tool):
59
  requirements = ["requests"]
60
 
61
  def forward(self, ean: str):
62
- import re
63
  from requests.adapters import HTTPAdapter
64
  try:
65
  from urllib3.util.retry import Retry
@@ -92,8 +92,9 @@ class OFFByEAN(Tool):
92
  urls = [
93
  f"https://world.openfoodfacts.org/api/v0/product/{code}.json",
94
  "https://world.openfoodfacts.org/api/v2/product/"
95
- f"{code}?lc=fr&fields=code,product_name,product_name_fr,brands,"
96
- "categories_tags,categories_tags_fr,ingredients_text,ingredients_text_fr,"
 
97
  "stores,status,status_verbose",
98
  f"https://world.openfoodfacts.net/api/v0/product/{code}.json",
99
  ]
@@ -111,23 +112,51 @@ class OFFByEAN(Tool):
111
  if status == 1 or product:
112
  p = product or {}
113
  product_name = _first(p.get("product_name_fr"), p.get("product_name"))
 
114
  categories_tags = p.get("categories_tags_fr") or p.get("categories_tags") or p.get("categories")
115
  categories_tags = _to_list(categories_tags)
116
- ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  brands = _first(p.get("brands"), None)
118
  stores = _first(p.get("stores"), None)
 
119
  return {
120
  "ok": True, "status": status, "status_verbose": data.get("status_verbose"),
121
  "code": code, "used_url": u,
122
  "product_name": product_name,
123
  "categories_tags": categories_tags,
 
124
  "ingredients_text": ingredients_text,
 
 
 
 
 
 
125
  "brands": brands, "brands_list": _to_list(brands),
126
  "stores": stores, "stores_list": _to_list(stores),
 
127
  "step3_inputs": {
128
  "product_name": product_name,
129
  "categories_tags": categories_tags,
130
  "ingredients_text": ingredients_text,
 
 
 
131
  },
132
  }
133
  except Exception as e:
@@ -135,6 +164,7 @@ class OFFByEAN(Tool):
135
 
136
  return {"ok": False, "status": 0, "code": code, "error": last_err or "not found"}
137
 
 
138
  # ---- RegexCOICOP ----
139
  class RegexCOICOP(Tool):
140
  name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
@@ -174,6 +204,9 @@ class OFFtoCOICOP(Tool):
174
  "product_name": {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
175
  "categories_tags": {"type":"array", "description":"Liste OFF categories_tags.", "nullable": True},
176
  "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
 
 
 
177
  "off_payload": {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
178
  }
179
  output_type="object"
@@ -186,47 +219,119 @@ class OFFtoCOICOP(Tool):
186
  s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
187
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
188
  return re.sub(r"\s+", " ", s).strip()
 
189
  def _to_list(self, x):
190
  import re
191
  if x is None: return []
192
  if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
193
  if isinstance(x, str): return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
194
  return [str(x).strip()]
 
195
  def _safe_parse(self, s):
196
  try: return self._json.loads(s)
197
  except Exception:
198
  try: return self._ast.literal_eval(s)
199
  except Exception: return {}
200
 
201
- RULES = [
202
- (_re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b"), ("01.1.4.5.2",0.95,"OFF: pâte molle/persillée")),
203
- (_re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b"), ("01.1.4.5.3",0.90,"OFF: pâte pressée")),
204
- (_re.compile(r"\b(CHEVRE|STE MAURE|CROTTIN|BUCHE|PICODON|PELARDON|BANON)\b"), ("01.1.4.5.4",0.90,"OFF: chèvre")),
205
- (_re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS)\b"), ("01.1.4.5.5",0.88,"OFF: fondu/rapé/portions")),
206
- (_re.compile(r"\b(CHEESE[ -]?SPREADS?|FROMAGE[S]?\s+FONDUS?)\b"), ("01.1.4.5.5",0.92,"OFF: cheese spreads / fromage fondu")),
207
- (_re.compile(r"\b(PROCESSED[ -]?CHEESE|TOASTINETTES?)\b"), ("01.1.4.5.5",0.90,"OFF: processed cheese")),
208
- (_re.compile(r"\b(FROMAGE\s+BLANC|PETITES?\s+CREMES?\s+FROMAGERES?)\b"), ("01.1.4.5.1",0.85,"OFF: fromage blanc / crèmes fromagères")),
209
- ]
210
-
211
- def forward(self, product_name=None, categories_tags=None, ingredients_text=None, off_payload=None):
212
- if off_payload and not (product_name or categories_tags or ingredients_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  data = self._safe_parse(off_payload) or {}
214
  product_name = data.get("product_name") or ""
215
  categories_tags = self._to_list(data.get("categories_tags"))
216
  ingredients_text= data.get("ingredients_text") or ""
217
-
218
- raw_cats = " ".join(self._to_list(categories_tags))
219
- text = " ".join([t for t in [
220
- self._normalize_txt(product_name or ""),
221
- self._normalize_txt(raw_cats),
222
- self._normalize_txt(ingredients_text or "")
223
- ] if t])
 
 
 
 
224
 
225
  c=[]
226
- for rx,(code,score,why) in self.RULES:
227
- if rx.search(text): c.append({"code":code,"why":why,"score":score})
228
 
229
- return {"candidates": c}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  # ---- SemSim ----
232
  class SemSim(Tool):
@@ -342,7 +447,7 @@ class Resolve(Tool):
342
  from typing import Dict, Any
343
  bucket: Dict[str, Dict[str, Any]] = {}
344
 
345
- # Tolérance : si on passe directement une liste de candidats [{...}, {...}]
346
  if isinstance(json_lists, list) and json_lists and isinstance(json_lists[0], dict) and "code" in json_lists[0]:
347
  json_lists = [{"candidates": json_lists}]
348
 
@@ -361,13 +466,24 @@ class Resolve(Tool):
361
  bucket[code]["score"] = max(bucket[code]["score"], score)
362
  bucket[code]["votes"] += 1
363
  if why: bucket[code]["evidences"].append(why)
 
364
  for v in bucket.values():
365
  v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
 
366
  ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
367
- if not ranked: return {"final": None, "alternatives": [], "explanation":"Aucun candidat"}
368
- final = ranked[0]; alts = ranked[1:1+max(0,int(topn))]
 
 
 
 
 
 
 
369
  exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
370
- return {"final": final, "alternatives": alts, "explanation": exp}
 
 
371
 
372
  # ---- build_agent ----
373
  def build_agent(model_id: str | None = None) -> CodeAgent:
 
59
  requirements = ["requests"]
60
 
61
  def forward(self, ean: str):
62
+ import re, json
63
  from requests.adapters import HTTPAdapter
64
  try:
65
  from urllib3.util.retry import Retry
 
92
  urls = [
93
  f"https://world.openfoodfacts.org/api/v0/product/{code}.json",
94
  "https://world.openfoodfacts.org/api/v2/product/"
95
+ f"{code}?lc=fr&fields=code,product_name,product_name_fr,brands,labels_tags,"
96
+ "categories_tags,categories_tags_fr,categories_hierarchy,ingredients,ingredients_text,"
97
+ "ingredients_text_fr,ingredients_text_en,allergens,allergens_tags,traces,traces_tags,"
98
  "stores,status,status_verbose",
99
  f"https://world.openfoodfacts.net/api/v0/product/{code}.json",
100
  ]
 
112
  if status == 1 or product:
113
  p = product or {}
114
  product_name = _first(p.get("product_name_fr"), p.get("product_name"))
115
+
116
  categories_tags = p.get("categories_tags_fr") or p.get("categories_tags") or p.get("categories")
117
  categories_tags = _to_list(categories_tags)
118
+ categories_hierarchy = _to_list(p.get("categories_hierarchy"))
119
+
120
+ # Ingrédients : texte + liste structurée
121
+ ingredients_text = _first(p.get("ingredients_text_fr"), p.get("ingredients_text_en"), p.get("ingredients_text"))
122
+ ingredients_list = []
123
+ if isinstance(p.get("ingredients"), list):
124
+ for it in p["ingredients"]:
125
+ txt = it.get("text") or it.get("id") or ""
126
+ if txt: ingredients_list.append(str(txt).strip())
127
+
128
+ allergens = _first(p.get("allergens"), None)
129
+ allergens_tags = _to_list(p.get("allergens_tags"))
130
+ traces = _first(p.get("traces"), None) # ex: "lait, noisettes"
131
+ traces_tags = _to_list(p.get("traces_tags"))
132
+ labels_tags = _to_list(p.get("labels_tags"))
133
+
134
  brands = _first(p.get("brands"), None)
135
  stores = _first(p.get("stores"), None)
136
+
137
  return {
138
  "ok": True, "status": status, "status_verbose": data.get("status_verbose"),
139
  "code": code, "used_url": u,
140
  "product_name": product_name,
141
  "categories_tags": categories_tags,
142
+ "categories_hierarchy": categories_hierarchy,
143
  "ingredients_text": ingredients_text,
144
+ "ingredients_list": ingredients_list,
145
+ "allergens": allergens,
146
+ "allergens_tags": allergens_tags,
147
+ "traces": traces,
148
+ "traces_tags": traces_tags,
149
+ "labels_tags": labels_tags,
150
  "brands": brands, "brands_list": _to_list(brands),
151
  "stores": stores, "stores_list": _to_list(stores),
152
+ # Entrées déjà prêtes pour l’étape 3
153
  "step3_inputs": {
154
  "product_name": product_name,
155
  "categories_tags": categories_tags,
156
  "ingredients_text": ingredients_text,
157
+ "ingredients_list": ingredients_list,
158
+ "traces": traces,
159
+ "traces_tags": traces_tags,
160
  },
161
  }
162
  except Exception as e:
 
164
 
165
  return {"ok": False, "status": 0, "code": code, "error": last_err or "not found"}
166
 
167
+
168
  # ---- RegexCOICOP ----
169
  class RegexCOICOP(Tool):
170
  name, description = "coicop_regex_rules", "Règles regex → candidats COICOP."
 
204
  "product_name": {"type":"string", "description":"Nom produit OFF (fr/en).", "nullable": True},
205
  "categories_tags": {"type":"array", "description":"Liste OFF categories_tags.", "nullable": True},
206
  "ingredients_text":{"type":"string","description":"Texte ingrédients.", "nullable": True},
207
+ "ingredients_list":{"type":"array", "description":"Liste structurée des ingrédients (strings).", "nullable": True},
208
+ "traces": {"type":"string","description":"Champ traces (fr).", "nullable": True},
209
+ "traces_tags": {"type":"array", "description":"Tags de traces.", "nullable": True},
210
  "off_payload": {"type":"string","description":"Chaîne JSON brute renvoyée par l'étape 2.", "nullable": True},
211
  }
212
  output_type="object"
 
219
  s = "".join(c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn")
220
  s = re.sub(r"[^A-Z0-9% ]+", " ", s)
221
  return re.sub(r"\s+", " ", s).strip()
222
+
223
  def _to_list(self, x):
224
  import re
225
  if x is None: return []
226
  if isinstance(x, list): return [str(t).strip() for t in x if str(t).strip()]
227
  if isinstance(x, str): return [p.strip() for p in re.split(r"[,\|;]", x) if p.strip()]
228
  return [str(x).strip()]
229
+
230
  def _safe_parse(self, s):
231
  try: return self._json.loads(s)
232
  except Exception:
233
  try: return self._ast.literal_eval(s)
234
  except Exception: return {}
235
 
236
+ # --- mots-clés par familles
237
+ SOFT = _re.compile(r"\b(CAMEMBERT|BRIE|COULOMMIERS|BLUE CHEESE|ROQUEFORT|GORGONZOLA|MUNSTER|REBLOCHON)\b")
238
+ PRESS = _re.compile(r"\b(EMMENTAL|COMTE|CANTAL|MIMOLETTE|GOUDA|EDAM|BEAUFORT|ABONDANCE|SALERS|TOMME|TOME)\b")
239
+ GOAT = _re.compile(r"\b(CHEVRE|CH[ÈE]VRE|STE MAURE|CROTTIN|BUCHE|BUCHETTE|PICODON|PELARDON|BANON)\b")
240
+ PROC = _re.compile(r"\b(FONDU|FONDUES?|RAPE|RÂPE|PORTIONS?|KIRI|VACHE QUI RIT|CARRE FRAIS|CARR[ÉE] FRAIS|TOASTINETTES?)\b")
241
+ GENERIC_FROMAGE = _re.compile(r"\bFROMAGE[S]?\b")
242
+ CREMEUX = _re.compile(r"\bCR[ÉE]MEUX\b")
243
+
244
+ # --- suppression des clauses "traces"
245
+ _TRACES_BLOCK = _re.compile(
246
+ r"(PEUT\s+CONTENIR\s+DES\s+TRACES\s+DE\s+[^.;\)\]]+)|"
247
+ r"(MAY\s+CONTAIN\s+TRACES\s+OF\s+[^.;\)\]]+)|"
248
+ r"(\bTRACES?\s+DE\s+[^.;\)\]]+)",
249
+ _re.I
250
+ )
251
+
252
+ def _without_traces(self, s: str) -> str:
253
+ if not s: return ""
254
+ return self._TRACES_BLOCK.sub(" ", s)
255
+
256
+ def _mk(self, code, base, why, source):
257
+ # petit lissage par source
258
+ boost = {"name":0.05, "cat":0.04, "ing_no_traces":0.03, "ing":0.01}.get(source, 0.0)
259
+ return {"code": code, "score": round(base+boost, 4), "why": f"{why} (source:{source})"}
260
+
261
+ def forward(self, product_name=None, categories_tags=None, ingredients_text=None,
262
+ ingredients_list=None, traces=None, traces_tags=None, off_payload=None):
263
+
264
+ # Hydrate depuis off_payload si besoin
265
+ if off_payload and not (product_name or categories_tags or ingredients_text or ingredients_list or traces or traces_tags):
266
  data = self._safe_parse(off_payload) or {}
267
  product_name = data.get("product_name") or ""
268
  categories_tags = self._to_list(data.get("categories_tags"))
269
  ingredients_text= data.get("ingredients_text") or ""
270
+ ingredients_list= self._to_list(data.get("ingredients_list"))
271
+ traces = data.get("traces") or ""
272
+ traces_tags = self._to_list(data.get("traces_tags"))
273
+
274
+ name = self._normalize_txt(product_name or "")
275
+ cats = self._normalize_txt(" ".join(self._to_list(categories_tags)))
276
+ ingt = self._normalize_txt(ingredients_text or "")
277
+ ingt_no_tr = self._normalize_txt(self._without_traces(ingredients_text or ""))
278
+ ing_list = [self._normalize_txt(x) for x in self._to_list(ingredients_list)]
279
+ ing_join = " ".join(ing_list)
280
+ ing_join_no_tr = self._normalize_txt(self._without_traces(ing_join))
281
 
282
  c=[]
 
 
283
 
284
+ # 1) Nom produit et catégories (forts)
285
+ if self.SOFT.search(name) or self.SOFT.search(cats):
286
+ c.append(self._mk("01.1.4.5.2", 0.90, "OFF: pâte molle/persillée", "name" if self.SOFT.search(name) else "cat"))
287
+ if self.PRESS.search(name) or self.PRESS.search(cats):
288
+ c.append(self._mk("01.1.4.5.3", 0.87, "OFF: pâte pressée", "name" if self.PRESS.search(name) else "cat"))
289
+ if self.GOAT.search(name) or self.GOAT.search(cats):
290
+ c.append(self._mk("01.1.4.5.4", 0.88, "OFF: chèvre", "name" if self.GOAT.search(name) else "cat"))
291
+ if self.PROC.search(name) or self.PROC.search(cats):
292
+ c.append(self._mk("01.1.4.5.5", 0.86, "OFF: fondu/râpé/portions", "name" if self.PROC.search(name) else "cat"))
293
+
294
+ # 2) Ingrédients – version SANS "traces" (moyen)
295
+ if self.SOFT.search(ingt_no_tr) or self.SOFT.search(ing_join_no_tr):
296
+ c.append(self._mk("01.1.4.5.2", 0.84, "Ingrédients (sans traces): pâte molle/persillée", "ing_no_traces"))
297
+ if self.PRESS.search(ingt_no_tr) or self.PRESS.search(ing_join_no_tr):
298
+ c.append(self._mk("01.1.4.5.3", 0.82, "Ingrédients (sans traces): pâte pressée", "ing_no_traces"))
299
+ if self.GOAT.search(ingt_no_tr) or self.GOAT.search(ing_join_no_tr):
300
+ # ⚠️ chèvre uniquement s'il n'est PAS dans des traces
301
+ c.append(self._mk("01.1.4.5.4", 0.83, "Ingrédients (sans traces): chèvre", "ing_no_traces"))
302
+ if self.PROC.search(ingt_no_tr) or self.PROC.search(ing_join_no_tr):
303
+ c.append(self._mk("01.1.4.5.5", 0.80, "Ingrédients (sans traces): fondu/râpé/portions", "ing_no_traces"))
304
+
305
+ # 3) Ingrédients bruts (faible, exemple ne déclenche pas chèvre seul)
306
+ if self.SOFT.search(ingt) or self.SOFT.search(ing_join):
307
+ c.append(self._mk("01.1.4.5.2", 0.78, "Ingrédients: pâte molle/persillée", "ing"))
308
+ if self.PRESS.search(ingt) or self.PRESS.search(ing_join):
309
+ c.append(self._mk("01.1.4.5.3", 0.76, "Ingrédients: pâte pressée", "ing"))
310
+ if self.PROC.search(ingt) or self.PROC.search(ing_join):
311
+ c.append(self._mk("01.1.4.5.5", 0.74, "Ingrédients: fondu/râpé/portions", "ing"))
312
+ # NB: volontairement pas de déclencheur chèvre ici (pour éviter les faux positifs via 'traces').
313
+
314
+ # 4) Génériques
315
+ if not c and (self.GENERIC_FROMAGE.search(name) or self.GENERIC_FROMAGE.search(cats)):
316
+ c.append(self._mk("01.1.4.5", 0.60, "OFF: générique fromage", "cat"))
317
+ if not c and self.CREMEUX.search(name):
318
+ c.append(self._mk("01.1.4.5.1", 0.58, "OFF: crémeux", "name"))
319
+
320
+ # Dédupliquer en gardant le meilleur score par code + agréger les justifs
321
+ bucket={}
322
+ for ci in c:
323
+ code=ci["code"]
324
+ if code not in bucket:
325
+ bucket[code] = {**ci, "why_list":[ci["why"]]}
326
+ else:
327
+ if ci["score"]>bucket[code]["score"]:
328
+ bucket[code].update({"score":ci["score"], "why":ci["why"]})
329
+ bucket[code]["why_list"].append(ci["why"])
330
+
331
+ ranked = sorted(bucket.values(), key=lambda x: x["score"], reverse=True)
332
+ # Retourne TOUJOURS au moins 3 candidats (en les espaçant si besoin)
333
+ return {"candidates": ranked[:max(3, len(ranked))][:3]}
334
+
335
 
336
  # ---- SemSim ----
337
  class SemSim(Tool):
 
447
  from typing import Dict, Any
448
  bucket: Dict[str, Dict[str, Any]] = {}
449
 
450
+ # Tolérance liste directe
451
  if isinstance(json_lists, list) and json_lists and isinstance(json_lists[0], dict) and "code" in json_lists[0]:
452
  json_lists = [{"candidates": json_lists}]
453
 
 
466
  bucket[code]["score"] = max(bucket[code]["score"], score)
467
  bucket[code]["votes"] += 1
468
  if why: bucket[code]["evidences"].append(why)
469
+
470
  for v in bucket.values():
471
  v["score_final"] = v["score"] + 0.05*(v["votes"]-1)
472
+
473
  ranked = sorted(bucket.values(), key=lambda x: x["score_final"], reverse=True)
474
+ if not ranked:
475
+ return {"final": None, "alternatives": [], "candidates_top": [], "explanation":"Aucun candidat"}
476
+
477
+ # Top fusionné : au moins 3
478
+ min_top = max(3, topn if isinstance(topn, int) and topn>0 else 3)
479
+ top_candidates = ranked[:min_top]
480
+
481
+ final = ranked[0]
482
+ alts = ranked[1:1+min_top-1] # alternatives complémentaires pour arriver à min_top au total
483
  exp = f"Choix {final['code']} (score {final['score_final']:.2f}) – votes={final['votes']} – raisons: {', '.join(sorted(set(final['evidences'])))}"
484
+
485
+ return {"final": final, "alternatives": alts, "candidates_top": top_candidates, "explanation": exp}
486
+
487
 
488
  # ---- build_agent ----
489
  def build_agent(model_id: str | None = None) -> CodeAgent: