Spaces:
Sleeping
Sleeping
Commit ·
e2f537d
1
Parent(s): 8f5ca4d
use glfi ecoalim (#17)
Browse files- use synonym data (ee99e09542c3ded23092d20cf7588028c23d9b92)
- src/data/feedipedia_feeds.json +0 -0
- src/data_loader.py +1 -1
- src/flowchart_engine.py +66 -12
- src/llm_service.py +66 -0
- src/synonym_service.py +264 -0
src/data/feedipedia_feeds.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/data_loader.py
CHANGED
|
@@ -272,7 +272,7 @@ def search_gfli(
|
|
| 272 |
# Filtrage strict : si un pays est demandé, ne retourner QUE les résultats de ce pays
|
| 273 |
mask = mask & mask_country
|
| 274 |
|
| 275 |
-
logging.info("Masked df", df[mask].head())
|
| 276 |
return df[mask].copy()
|
| 277 |
|
| 278 |
|
|
|
|
| 272 |
# Filtrage strict : si un pays est demandé, ne retourner QUE les résultats de ce pays
|
| 273 |
mask = mask & mask_country
|
| 274 |
|
| 275 |
+
logging.info("Masked df: %s", df[mask].head())
|
| 276 |
return df[mask].copy()
|
| 277 |
|
| 278 |
|
src/flowchart_engine.py
CHANGED
|
@@ -6,6 +6,7 @@ en fonction de la provenance, du niveau de transformation, et des données dispo
|
|
| 6 |
"""
|
| 7 |
from __future__ import annotations
|
| 8 |
|
|
|
|
| 9 |
from dataclasses import dataclass, field
|
| 10 |
from typing import List, Optional, Tuple
|
| 11 |
|
|
@@ -13,6 +14,10 @@ import config
|
|
| 13 |
import data_loader
|
| 14 |
import llm_service
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
@dataclass
|
| 18 |
class StepLog:
|
|
@@ -93,7 +98,9 @@ def _is_european(pays: Optional[str]) -> bool:
|
|
| 93 |
return True
|
| 94 |
# Try to get ISO from French mapping, or use uppercase input as fallback
|
| 95 |
pays_iso = config.PAYS_FR_TO_ISO.get(n, pays.strip().upper())
|
| 96 |
-
|
|
|
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
def _get_country_iso(pays: Optional[str]) -> Optional[str]:
|
|
@@ -207,7 +214,7 @@ def _resolve_node_4(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 207 |
if eco_worst:
|
| 208 |
val, nom, src = eco_worst
|
| 209 |
result.impact_kg_co2_eq = val
|
| 210 |
-
|
| 211 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 212 |
result.source_db = src
|
| 213 |
result.intrant_utilise = nom
|
|
@@ -301,7 +308,7 @@ def _resolve_node_8(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 301 |
if eco_result:
|
| 302 |
val = eco_result["valeur_kg_co2_eq"]
|
| 303 |
result.impact_kg_co2_eq = val
|
| 304 |
-
|
| 305 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 306 |
result.source_db = eco_result["source"]
|
| 307 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
@@ -329,7 +336,7 @@ def _resolve_node_8(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 329 |
if eco_smart:
|
| 330 |
val = eco_smart["valeur_kg_co2_eq"]
|
| 331 |
result.impact_kg_co2_eq = val
|
| 332 |
-
|
| 333 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 334 |
result.source_db = eco_smart["source"]
|
| 335 |
result.intrant_utilise = eco_smart["nom_intrant"]
|
|
@@ -401,6 +408,7 @@ def _resolve_node_9(matiere: str, pays_production: str, result: CarbonResult) ->
|
|
| 401 |
"""
|
| 402 |
country_iso = _get_country_iso(pays_production)
|
| 403 |
|
|
|
|
| 404 |
result.actions_appliquees.append(f"1. Recherche dans GFLI pour le pays {pays_production} (ISO: {country_iso})")
|
| 405 |
gfli_result = llm_service.smart_search_gfli(matiere, country_iso=country_iso)
|
| 406 |
if gfli_result:
|
|
@@ -413,11 +421,15 @@ def _resolve_node_9(matiere: str, pays_production: str, result: CarbonResult) ->
|
|
| 413 |
result.match_exact = gfli_result["match_exact"]
|
| 414 |
result.justification_alternative = gfli_result.get("justification")
|
| 415 |
result.actions_appliquees.append(f" → Trouvé dans GFLI : {gfli_result['nom_intrant']}")
|
|
|
|
| 416 |
return result
|
|
|
|
|
|
|
| 417 |
|
| 418 |
# Étape 2 : RER ou GLO
|
| 419 |
is_eu = _is_european(pays_production)
|
| 420 |
if is_eu:
|
|
|
|
| 421 |
result.actions_appliquees.append("2. Pays européen → Recherche Mix Européen (RER) dans GFLI")
|
| 422 |
rer = llm_service.smart_search_gfli(matiere, country_iso="RER")
|
| 423 |
if rer:
|
|
@@ -430,8 +442,13 @@ def _resolve_node_9(matiere: str, pays_production: str, result: CarbonResult) ->
|
|
| 430 |
result.match_exact = rer["match_exact"]
|
| 431 |
result.justification_alternative = rer.get("justification")
|
| 432 |
result.actions_appliquees.append(f" → Trouvé RER : {rer['nom_intrant']}")
|
|
|
|
| 433 |
return result
|
|
|
|
|
|
|
|
|
|
| 434 |
else:
|
|
|
|
| 435 |
result.actions_appliquees.append("2. Pays hors Europe → Recherche Mix Monde (GLO) dans GFLI")
|
| 436 |
glo = llm_service.smart_search_gfli(matiere, country_iso="GLO")
|
| 437 |
if glo:
|
|
@@ -443,24 +460,34 @@ def _resolve_node_9(matiere: str, pays_production: str, result: CarbonResult) ->
|
|
| 443 |
result.intrant_utilise = glo["nom_intrant"]
|
| 444 |
result.match_exact = glo["match_exact"]
|
| 445 |
result.justification_alternative = glo.get("justification")
|
|
|
|
| 446 |
result.actions_appliquees.append(f" → Trouvé GLO : {glo['nom_intrant']}")
|
| 447 |
return result
|
|
|
|
|
|
|
|
|
|
| 448 |
|
|
|
|
| 449 |
result.actions_appliquees.append("3. Recherche dans ECOALIM")
|
| 450 |
eco_result = llm_service.smart_search_ecoalim(matiere, pays_production=pays_production)
|
| 451 |
if eco_result:
|
| 452 |
val = eco_result["valeur_kg_co2_eq"]
|
| 453 |
result.impact_kg_co2_eq = val
|
| 454 |
-
|
| 455 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 456 |
result.source_db = eco_result["source"]
|
| 457 |
result.intrant_utilise = eco_result["nom_intrant"]
|
| 458 |
result.match_exact = eco_result["match_exact"]
|
| 459 |
result.justification_alternative = eco_result.get("justification")
|
| 460 |
result.actions_appliquees.append(f" → Trouvé dans ECOALIM : {eco_result['nom_intrant']}")
|
|
|
|
| 461 |
return result
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
# Étape 4 : Fallback - Proposer des matières alternatives
|
|
|
|
| 464 |
result.actions_appliquees.append(f"4. Fallback - Recherche via LLM de 4 alternatives ({pays_production})")
|
| 465 |
alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint=pays_production)
|
| 466 |
|
|
@@ -526,7 +553,7 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 526 |
if eco_result:
|
| 527 |
val = eco_result["valeur_kg_co2_eq"]
|
| 528 |
result.impact_kg_co2_eq = val
|
| 529 |
-
|
| 530 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 531 |
result.source_db = eco_result["source"]
|
| 532 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
@@ -548,6 +575,8 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 548 |
result.match_exact = gfli_result["match_exact"]
|
| 549 |
result.justification_alternative = gfli_result.get("justification")
|
| 550 |
result.actions_appliquees.append(f" → Trouvé dans GFLI : {gfli_result['nom_intrant']}")
|
|
|
|
|
|
|
| 551 |
return result
|
| 552 |
|
| 553 |
# Étape 3 : LLM process le plus proche
|
|
@@ -556,7 +585,7 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 556 |
if eco_smart:
|
| 557 |
val = eco_smart["valeur_kg_co2_eq"]
|
| 558 |
result.impact_kg_co2_eq = val
|
| 559 |
-
|
| 560 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 561 |
result.source_db = eco_smart["source"]
|
| 562 |
result.intrant_utilise = eco_smart["nom_intrant"]
|
|
@@ -606,13 +635,16 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 606 |
# Utiliser la combined comme valeur principale
|
| 607 |
val = alt["impact"]
|
| 608 |
result.impact_kg_co2_eq = val
|
| 609 |
-
|
| 610 |
-
|
|
|
|
| 611 |
result.source_db = alt["source"]
|
| 612 |
result.intrant_utilise = alt["name"]
|
| 613 |
result.match_exact = False
|
| 614 |
result.justification_alternative = alt["reasoning"]
|
| 615 |
-
result.actions_appliquees.append(f" → Matière proposée (combo) : {alt['name']} = {val:.
|
|
|
|
|
|
|
| 616 |
return result
|
| 617 |
|
| 618 |
result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé, France/France)."
|
|
@@ -662,7 +694,7 @@ def _resolve_node_11(matiere: str, result: CarbonResult) -> CarbonResult:
|
|
| 662 |
if eco_result:
|
| 663 |
val = eco_result["valeur_kg_co2_eq"]
|
| 664 |
result.impact_kg_co2_eq = val
|
| 665 |
-
|
| 666 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 667 |
result.source_db = eco_result["source"]
|
| 668 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
@@ -792,7 +824,7 @@ def _resolve_node_12(matiere: str, pays_transformation: str, result: CarbonResul
|
|
| 792 |
if eco_result:
|
| 793 |
val = eco_result["valeur_kg_co2_eq"]
|
| 794 |
result.impact_kg_co2_eq = val
|
| 795 |
-
|
| 796 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 797 |
result.source_db = eco_result["source"]
|
| 798 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
@@ -1647,6 +1679,9 @@ def evaluate_carbon_impact(
|
|
| 1647 |
Returns:
|
| 1648 |
CarbonResult avec toutes les informations
|
| 1649 |
"""
|
|
|
|
|
|
|
|
|
|
| 1650 |
result = CarbonResult(
|
| 1651 |
matiere_premiere=matiere_premiere,
|
| 1652 |
pays_production=pays_production,
|
|
@@ -1662,6 +1697,7 @@ def evaluate_carbon_impact(
|
|
| 1662 |
# Les minéraux ne suivent pas la classification brut/transformé
|
| 1663 |
result.classification = "mineral"
|
| 1664 |
result.classification_justification = "MP minérale / micro-ingrédient / additif"
|
|
|
|
| 1665 |
result.parcours.append(StepLog(
|
| 1666 |
node_id="classification",
|
| 1667 |
question="Type de matière première",
|
|
@@ -1672,6 +1708,7 @@ def evaluate_carbon_impact(
|
|
| 1672 |
result.classification = classification.get("classification", "brut")
|
| 1673 |
result.classification_justification = classification.get("justification", "")
|
| 1674 |
is_transformed = result.classification == "transforme"
|
|
|
|
| 1675 |
|
| 1676 |
result.parcours.append(StepLog(
|
| 1677 |
node_id="classification",
|
|
@@ -1682,6 +1719,7 @@ def evaluate_carbon_impact(
|
|
| 1682 |
# -----------------------------------------------------------------------
|
| 1683 |
# Dispatch selon le type de MP
|
| 1684 |
# -----------------------------------------------------------------------
|
|
|
|
| 1685 |
if type_mp == "soja":
|
| 1686 |
result = _evaluate_soja(matiere_premiere, pays_production, pays_transformation, result)
|
| 1687 |
|
|
@@ -1695,6 +1733,7 @@ def evaluate_carbon_impact(
|
|
| 1695 |
|
| 1696 |
if not provenance_connue:
|
| 1697 |
# node_1 → Non → node_provenance_inconnue
|
|
|
|
| 1698 |
result.parcours.append(StepLog(
|
| 1699 |
node_id="node_1",
|
| 1700 |
question="Connaissez-vous l'endroit ou l'intrant a ete cultive ou produit ?",
|
|
@@ -1715,10 +1754,12 @@ def evaluate_carbon_impact(
|
|
| 1715 |
answer="Intrant transforme ou coproduit",
|
| 1716 |
))
|
| 1717 |
result.node_resultat = "node_base_gfli_defaut"
|
|
|
|
| 1718 |
result = _resolve_node_4(matiere_premiere, result)
|
| 1719 |
|
| 1720 |
else:
|
| 1721 |
# node_1 → Oui → node_transformation
|
|
|
|
| 1722 |
result.parcours.append(StepLog(
|
| 1723 |
node_id="node_1",
|
| 1724 |
question="Connaissez-vous l'endroit ou l'intrant a ete cultive ou produit ?",
|
|
@@ -1735,6 +1776,7 @@ def evaluate_carbon_impact(
|
|
| 1735 |
|
| 1736 |
if _is_france(pays_production):
|
| 1737 |
# node_localisation_brut → Oui (France) → node_base_ecoalim
|
|
|
|
| 1738 |
result.parcours.append(StepLog(
|
| 1739 |
node_id="node_localisation_brut",
|
| 1740 |
question="L'intrant brut est-il cultive en France ?",
|
|
@@ -1744,6 +1786,7 @@ def evaluate_carbon_impact(
|
|
| 1744 |
result = _resolve_node_8(matiere_premiere, result)
|
| 1745 |
else:
|
| 1746 |
# node_localisation_brut → Non → node_base_gfli_pays
|
|
|
|
| 1747 |
result.parcours.append(StepLog(
|
| 1748 |
node_id="node_localisation_brut",
|
| 1749 |
question="L'intrant brut est-il cultive en France ?",
|
|
@@ -1793,13 +1836,16 @@ def evaluate_carbon_impact(
|
|
| 1793 |
# ------------------------------------------------------------------
|
| 1794 |
# Post-processing : normaliser les unités (t CO2 eq / t produit)
|
| 1795 |
# ------------------------------------------------------------------
|
|
|
|
| 1796 |
if result.impact_kg_co2_eq is not None and result.unite_source:
|
| 1797 |
if "tonne" in result.unite_source:
|
| 1798 |
# GFLI : kg CO2 eq / tonne -> t CO2 eq / t
|
| 1799 |
result.impact_tonne_co2_eq = result.impact_kg_co2_eq / 1000.0
|
|
|
|
| 1800 |
else:
|
| 1801 |
# EcoALIM : kg CO2 eq / kg -> t CO2 eq / t (même valeur numérique)
|
| 1802 |
result.impact_tonne_co2_eq = result.impact_kg_co2_eq
|
|
|
|
| 1803 |
|
| 1804 |
# ------------------------------------------------------------------
|
| 1805 |
# Post-processing : collecter les candidats alternatifs
|
|
@@ -1833,6 +1879,14 @@ def evaluate_carbon_impact(
|
|
| 1833 |
f"'{result.matiere_premiere}' (matière la plus proche dans {result.source_db})."
|
| 1834 |
)
|
| 1835 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1836 |
return result
|
| 1837 |
|
| 1838 |
|
|
|
|
| 6 |
"""
|
| 7 |
from __future__ import annotations
|
| 8 |
|
| 9 |
+
import logging
|
| 10 |
from dataclasses import dataclass, field
|
| 11 |
from typing import List, Optional, Tuple
|
| 12 |
|
|
|
|
| 14 |
import data_loader
|
| 15 |
import llm_service
|
| 16 |
|
| 17 |
+
# Configure logging for flowchart engine
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
logger.setLevel(logging.DEBUG)
|
| 20 |
+
|
| 21 |
|
| 22 |
@dataclass
|
| 23 |
class StepLog:
|
|
|
|
| 98 |
return True
|
| 99 |
# Try to get ISO from French mapping, or use uppercase input as fallback
|
| 100 |
pays_iso = config.PAYS_FR_TO_ISO.get(n, pays.strip().upper())
|
| 101 |
+
is_eu = pays_iso in config.EUROPEAN_COUNTRIES_ISO
|
| 102 |
+
logger.debug(f"_is_european({pays}) → {is_eu} (ISO: {pays_iso})")
|
| 103 |
+
return is_eu
|
| 104 |
|
| 105 |
|
| 106 |
def _get_country_iso(pays: Optional[str]) -> Optional[str]:
|
|
|
|
| 214 |
if eco_worst:
|
| 215 |
val, nom, src = eco_worst
|
| 216 |
result.impact_kg_co2_eq = val
|
| 217 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 218 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 219 |
result.source_db = src
|
| 220 |
result.intrant_utilise = nom
|
|
|
|
| 308 |
if eco_result:
|
| 309 |
val = eco_result["valeur_kg_co2_eq"]
|
| 310 |
result.impact_kg_co2_eq = val
|
| 311 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 312 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 313 |
result.source_db = eco_result["source"]
|
| 314 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
|
|
| 336 |
if eco_smart:
|
| 337 |
val = eco_smart["valeur_kg_co2_eq"]
|
| 338 |
result.impact_kg_co2_eq = val
|
| 339 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 340 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 341 |
result.source_db = eco_smart["source"]
|
| 342 |
result.intrant_utilise = eco_smart["nom_intrant"]
|
|
|
|
| 408 |
"""
|
| 409 |
country_iso = _get_country_iso(pays_production)
|
| 410 |
|
| 411 |
+
logger.info(f"Node 9: Étape 1 - Recherche '{matiere}' dans GFLI pour {pays_production} (ISO: {country_iso})")
|
| 412 |
result.actions_appliquees.append(f"1. Recherche dans GFLI pour le pays {pays_production} (ISO: {country_iso})")
|
| 413 |
gfli_result = llm_service.smart_search_gfli(matiere, country_iso=country_iso)
|
| 414 |
if gfli_result:
|
|
|
|
| 421 |
result.match_exact = gfli_result["match_exact"]
|
| 422 |
result.justification_alternative = gfli_result.get("justification")
|
| 423 |
result.actions_appliquees.append(f" → Trouvé dans GFLI : {gfli_result['nom_intrant']}")
|
| 424 |
+
logger.info(f"✓ Trouvé pays spécifique: {gfli_result['nom_intrant']} = {val:.2f} kg CO2/t")
|
| 425 |
return result
|
| 426 |
+
else:
|
| 427 |
+
logger.warning(f"✗ Pays spécifique ({country_iso}) non trouvé pour '{matiere}'")
|
| 428 |
|
| 429 |
# Étape 2 : RER ou GLO
|
| 430 |
is_eu = _is_european(pays_production)
|
| 431 |
if is_eu:
|
| 432 |
+
logger.info(f"Node 9: Pays européen ({pays_production}) → Recherche RER")
|
| 433 |
result.actions_appliquees.append("2. Pays européen → Recherche Mix Européen (RER) dans GFLI")
|
| 434 |
rer = llm_service.smart_search_gfli(matiere, country_iso="RER")
|
| 435 |
if rer:
|
|
|
|
| 442 |
result.match_exact = rer["match_exact"]
|
| 443 |
result.justification_alternative = rer.get("justification")
|
| 444 |
result.actions_appliquees.append(f" → Trouvé RER : {rer['nom_intrant']}")
|
| 445 |
+
logger.info(f"✓ Trouvé RER: {rer['nom_intrant']} = {val:.2f} kg CO2/t")
|
| 446 |
return result
|
| 447 |
+
else:
|
| 448 |
+
logger.warning(f"✗ RER non trouvé pour '{matiere}' → Fallback vers ECOALIM")
|
| 449 |
+
result.actions_appliquees.append(f" → RER non trouvé pour '{matiere}'")
|
| 450 |
else:
|
| 451 |
+
logger.info(f"Node 9: Pays NON européen ({pays_production}) → Recherche GLO")
|
| 452 |
result.actions_appliquees.append("2. Pays hors Europe → Recherche Mix Monde (GLO) dans GFLI")
|
| 453 |
glo = llm_service.smart_search_gfli(matiere, country_iso="GLO")
|
| 454 |
if glo:
|
|
|
|
| 460 |
result.intrant_utilise = glo["nom_intrant"]
|
| 461 |
result.match_exact = glo["match_exact"]
|
| 462 |
result.justification_alternative = glo.get("justification")
|
| 463 |
+
logger.info(f"✓ Trouvé GLO: {glo['nom_intrant']} = {val:.2f} kg CO2/t")
|
| 464 |
result.actions_appliquees.append(f" → Trouvé GLO : {glo['nom_intrant']}")
|
| 465 |
return result
|
| 466 |
+
else:
|
| 467 |
+
logger.warning(f"✗ GLO non trouvé pour '{matiere}' → Fallback vers ECOALIM")
|
| 468 |
+
result.actions_appliquees.append(f" → GLO non trouvé pour '{matiere}'")
|
| 469 |
|
| 470 |
+
logger.info(f"Node 9: Étape 3 - Recherche ECOALIM comme fallback")
|
| 471 |
result.actions_appliquees.append("3. Recherche dans ECOALIM")
|
| 472 |
eco_result = llm_service.smart_search_ecoalim(matiere, pays_production=pays_production)
|
| 473 |
if eco_result:
|
| 474 |
val = eco_result["valeur_kg_co2_eq"]
|
| 475 |
result.impact_kg_co2_eq = val
|
| 476 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 477 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 478 |
result.source_db = eco_result["source"]
|
| 479 |
result.intrant_utilise = eco_result["nom_intrant"]
|
| 480 |
result.match_exact = eco_result["match_exact"]
|
| 481 |
result.justification_alternative = eco_result.get("justification")
|
| 482 |
result.actions_appliquees.append(f" → Trouvé dans ECOALIM : {eco_result['nom_intrant']}")
|
| 483 |
+
logger.info(f"✓ Trouvé ECOALIM: {eco_result['nom_intrant']} = {val:.2f} kg CO2/kg")
|
| 484 |
return result
|
| 485 |
+
else:
|
| 486 |
+
logger.warning(f"✗ ECOALIM non trouvé pour '{matiere}' → Fallback vers alternatives LLM")
|
| 487 |
+
result.actions_appliquees.append(f" → ECOALIM non trouvé pour '{matiere}'")
|
| 488 |
|
| 489 |
# Étape 4 : Fallback - Proposer des matières alternatives
|
| 490 |
+
logger.info(f"Node 9: Étape 4 - Recherche d'alternatives via LLM pour '{matiere}'")
|
| 491 |
result.actions_appliquees.append(f"4. Fallback - Recherche via LLM de 4 alternatives ({pays_production})")
|
| 492 |
alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint=pays_production)
|
| 493 |
|
|
|
|
| 553 |
if eco_result:
|
| 554 |
val = eco_result["valeur_kg_co2_eq"]
|
| 555 |
result.impact_kg_co2_eq = val
|
| 556 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 557 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 558 |
result.source_db = eco_result["source"]
|
| 559 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
|
|
| 575 |
result.match_exact = gfli_result["match_exact"]
|
| 576 |
result.justification_alternative = gfli_result.get("justification")
|
| 577 |
result.actions_appliquees.append(f" → Trouvé dans GFLI : {gfli_result['nom_intrant']}")
|
| 578 |
+
# Update node to reflect that GFLI was used instead of ECOALIM
|
| 579 |
+
result.node_resultat = "node_base_gfli_fr"
|
| 580 |
return result
|
| 581 |
|
| 582 |
# Étape 3 : LLM process le plus proche
|
|
|
|
| 585 |
if eco_smart:
|
| 586 |
val = eco_smart["valeur_kg_co2_eq"]
|
| 587 |
result.impact_kg_co2_eq = val
|
| 588 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 589 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 590 |
result.source_db = eco_smart["source"]
|
| 591 |
result.intrant_utilise = eco_smart["nom_intrant"]
|
|
|
|
| 635 |
# Utiliser la combined comme valeur principale
|
| 636 |
val = alt["impact"]
|
| 637 |
result.impact_kg_co2_eq = val
|
| 638 |
+
# Note: Alternatives from GFLI are in kg CO2 eq / tonne
|
| 639 |
+
# impact_tonne_co2_eq will be set by post-processing
|
| 640 |
+
result.unite_source = "kg CO2 eq / tonne de produit"
|
| 641 |
result.source_db = alt["source"]
|
| 642 |
result.intrant_utilise = alt["name"]
|
| 643 |
result.match_exact = False
|
| 644 |
result.justification_alternative = alt["reasoning"]
|
| 645 |
+
result.actions_appliquees.append(f" → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
|
| 646 |
+
# Update node to reflect that GFLI was used (via LLM alternatives)
|
| 647 |
+
result.node_resultat = "node_base_gfli_alternatives"
|
| 648 |
return result
|
| 649 |
|
| 650 |
result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé, France/France)."
|
|
|
|
| 694 |
if eco_result:
|
| 695 |
val = eco_result["valeur_kg_co2_eq"]
|
| 696 |
result.impact_kg_co2_eq = val
|
| 697 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 698 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 699 |
result.source_db = eco_result["source"]
|
| 700 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
|
|
| 824 |
if eco_result:
|
| 825 |
val = eco_result["valeur_kg_co2_eq"]
|
| 826 |
result.impact_kg_co2_eq = val
|
| 827 |
+
# Note: impact_tonne_co2_eq will be set by post-processing based on unite_source
|
| 828 |
result.unite_source = "kg CO2 eq / kg de produit"
|
| 829 |
result.source_db = eco_result["source"]
|
| 830 |
result.intrant_utilise = eco_result["nom_intrant"]
|
|
|
|
| 1679 |
Returns:
|
| 1680 |
CarbonResult avec toutes les informations
|
| 1681 |
"""
|
| 1682 |
+
logger.info(f"=== Début évaluation: {matiere_premiere} ===")
|
| 1683 |
+
logger.info(f"Type MP: {type_mp}, Pays production: {pays_production or 'inconnu'}, Pays transformation: {pays_transformation or 'N/A'}")
|
| 1684 |
+
|
| 1685 |
result = CarbonResult(
|
| 1686 |
matiere_premiere=matiere_premiere,
|
| 1687 |
pays_production=pays_production,
|
|
|
|
| 1697 |
# Les minéraux ne suivent pas la classification brut/transformé
|
| 1698 |
result.classification = "mineral"
|
| 1699 |
result.classification_justification = "MP minérale / micro-ingrédient / additif"
|
| 1700 |
+
logger.info(f"Classification: Minérale (pas de classification brut/transformé)")
|
| 1701 |
result.parcours.append(StepLog(
|
| 1702 |
node_id="classification",
|
| 1703 |
question="Type de matière première",
|
|
|
|
| 1708 |
result.classification = classification.get("classification", "brut")
|
| 1709 |
result.classification_justification = classification.get("justification", "")
|
| 1710 |
is_transformed = result.classification == "transforme"
|
| 1711 |
+
logger.info(f"Classification: {'Transformée' if is_transformed else 'Brute'} - {result.classification_justification}")
|
| 1712 |
|
| 1713 |
result.parcours.append(StepLog(
|
| 1714 |
node_id="classification",
|
|
|
|
| 1719 |
# -----------------------------------------------------------------------
|
| 1720 |
# Dispatch selon le type de MP
|
| 1721 |
# -----------------------------------------------------------------------
|
| 1722 |
+
logger.info(f"Dispatch vers logigramme: {type_mp}")
|
| 1723 |
if type_mp == "soja":
|
| 1724 |
result = _evaluate_soja(matiere_premiere, pays_production, pays_transformation, result)
|
| 1725 |
|
|
|
|
| 1733 |
|
| 1734 |
if not provenance_connue:
|
| 1735 |
# node_1 → Non → node_provenance_inconnue
|
| 1736 |
+
logger.info("Node 1: Provenance INCONNUE → Recherche valeur la plus défavorable")
|
| 1737 |
result.parcours.append(StepLog(
|
| 1738 |
node_id="node_1",
|
| 1739 |
question="Connaissez-vous l'endroit ou l'intrant a ete cultive ou produit ?",
|
|
|
|
| 1754 |
answer="Intrant transforme ou coproduit",
|
| 1755 |
))
|
| 1756 |
result.node_resultat = "node_base_gfli_defaut"
|
| 1757 |
+
logger.info("→ Résolution via Node 4 (valeur la plus défavorable)")
|
| 1758 |
result = _resolve_node_4(matiere_premiere, result)
|
| 1759 |
|
| 1760 |
else:
|
| 1761 |
# node_1 → Oui → node_transformation
|
| 1762 |
+
logger.info(f"Node 1: Provenance CONNUE - Production: {pays_production}, Transformation: {pays_transformation or 'N/A'}")
|
| 1763 |
result.parcours.append(StepLog(
|
| 1764 |
node_id="node_1",
|
| 1765 |
question="Connaissez-vous l'endroit ou l'intrant a ete cultive ou produit ?",
|
|
|
|
| 1776 |
|
| 1777 |
if _is_france(pays_production):
|
| 1778 |
# node_localisation_brut → Oui (France) → node_base_ecoalim
|
| 1779 |
+
logger.info("Intrant BRUT cultivé en FRANCE → Node 8 (ECOALIM prioritaire)")
|
| 1780 |
result.parcours.append(StepLog(
|
| 1781 |
node_id="node_localisation_brut",
|
| 1782 |
question="L'intrant brut est-il cultive en France ?",
|
|
|
|
| 1786 |
result = _resolve_node_8(matiere_premiere, result)
|
| 1787 |
else:
|
| 1788 |
# node_localisation_brut → Non → node_base_gfli_pays
|
| 1789 |
+
logger.info(f"Intrant BRUT cultivé HORS FRANCE ({pays_production}) → Node 9 (GFLI pays/RER/GLO)")
|
| 1790 |
result.parcours.append(StepLog(
|
| 1791 |
node_id="node_localisation_brut",
|
| 1792 |
question="L'intrant brut est-il cultive en France ?",
|
|
|
|
| 1836 |
# ------------------------------------------------------------------
|
| 1837 |
# Post-processing : normaliser les unités (t CO2 eq / t produit)
|
| 1838 |
# ------------------------------------------------------------------
|
| 1839 |
+
logger.info("Post-processing: Normalisation des unités")
|
| 1840 |
if result.impact_kg_co2_eq is not None and result.unite_source:
|
| 1841 |
if "tonne" in result.unite_source:
|
| 1842 |
# GFLI : kg CO2 eq / tonne -> t CO2 eq / t
|
| 1843 |
result.impact_tonne_co2_eq = result.impact_kg_co2_eq / 1000.0
|
| 1844 |
+
logger.debug(f"Conversion GFLI: {result.impact_kg_co2_eq} kg CO2/t → {result.impact_tonne_co2_eq} t CO2/t")
|
| 1845 |
else:
|
| 1846 |
# EcoALIM : kg CO2 eq / kg -> t CO2 eq / t (même valeur numérique)
|
| 1847 |
result.impact_tonne_co2_eq = result.impact_kg_co2_eq
|
| 1848 |
+
logger.debug(f"Conversion EcoALIM: {result.impact_kg_co2_eq} kg CO2/kg → {result.impact_tonne_co2_eq} t CO2/t (no numerical change)")
|
| 1849 |
|
| 1850 |
# ------------------------------------------------------------------
|
| 1851 |
# Post-processing : collecter les candidats alternatifs
|
|
|
|
| 1879 |
f"'{result.matiere_premiere}' (matière la plus proche dans {result.source_db})."
|
| 1880 |
)
|
| 1881 |
|
| 1882 |
+
# Log final result
|
| 1883 |
+
if result.erreur:
|
| 1884 |
+
logger.warning(f"=== Évaluation terminée avec ERREUR: {result.erreur} ===")
|
| 1885 |
+
else:
|
| 1886 |
+
logger.info(f"=== Évaluation terminée avec SUCCÈS ===")
|
| 1887 |
+
logger.info(f"Résultat: {result.intrant_utilise} = {result.impact_kg_co2_eq:.2f} {result.unite_source}")
|
| 1888 |
+
logger.info(f"Source: {result.source_db}, Match exact: {result.match_exact}, Node: {result.node_resultat}")
|
| 1889 |
+
|
| 1890 |
return result
|
| 1891 |
|
| 1892 |
|
src/llm_service.py
CHANGED
|
@@ -6,12 +6,16 @@ llm_service.py - Intégration avec Mistral AI pour :
|
|
| 6 |
"""
|
| 7 |
from __future__ import annotations
|
| 8 |
|
|
|
|
| 9 |
from typing import Optional
|
| 10 |
from difflib import get_close_matches
|
| 11 |
from mistralai import Mistral
|
| 12 |
|
| 13 |
import config
|
| 14 |
import data_loader
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def _get_client() -> Mistral:
|
|
@@ -339,12 +343,17 @@ def smart_search_ecoalim(
|
|
| 339 |
puis si pas de résultat, utilise le LLM pour trouver un match.
|
| 340 |
Retourne un dict avec valeur, nom, source et éventuellement justification.
|
| 341 |
"""
|
|
|
|
|
|
|
|
|
|
| 342 |
# Tentative directe
|
|
|
|
| 343 |
result = data_loader.get_ecoalim_climate_value(matiere, pays_production, pays_transformation)
|
| 344 |
if result:
|
| 345 |
val, nom, source = result
|
| 346 |
# Valider que ce n'est pas un faux positif
|
| 347 |
if data_loader.is_name_match(matiere, nom):
|
|
|
|
| 348 |
return {
|
| 349 |
"valeur_kg_co2_eq": val,
|
| 350 |
"nom_intrant": nom,
|
|
@@ -353,16 +362,19 @@ def smart_search_ecoalim(
|
|
| 353 |
"justification": None,
|
| 354 |
}
|
| 355 |
# Faux positif — on continue vers le LLM
|
|
|
|
| 356 |
|
| 357 |
# Tentative avec traduction EN->FR
|
| 358 |
matiere_fr = translate_matiere_to_french(matiere)
|
| 359 |
if matiere_fr.lower() != matiere.lower():
|
|
|
|
| 360 |
result = data_loader.get_ecoalim_climate_value(matiere_fr, pays_production, pays_transformation)
|
| 361 |
if not result:
|
| 362 |
result = data_loader.get_ecoalim_climate_value(matiere_fr)
|
| 363 |
if result:
|
| 364 |
val, nom, source = result
|
| 365 |
if data_loader.is_name_match(matiere_fr, nom):
|
|
|
|
| 366 |
return {
|
| 367 |
"valeur_kg_co2_eq": val,
|
| 368 |
"nom_intrant": nom,
|
|
@@ -370,17 +382,21 @@ def smart_search_ecoalim(
|
|
| 370 |
"match_exact": False,
|
| 371 |
"justification": f"Traduction automatique : '{matiere}' → '{matiere_fr}'",
|
| 372 |
}
|
|
|
|
| 373 |
|
| 374 |
# Tentative via LLM
|
|
|
|
| 375 |
match_info = find_matching_name_in_db(matiere, "ECOALIM")
|
| 376 |
if match_info.get("matched_name") and match_info["matched_name"] != "AUCUN":
|
| 377 |
matched_name = match_info["matched_name"]
|
|
|
|
| 378 |
result = data_loader.get_ecoalim_climate_value(matched_name, pays_production, pays_transformation)
|
| 379 |
if not result:
|
| 380 |
result = data_loader.get_ecoalim_climate_value(matched_name)
|
| 381 |
if result:
|
| 382 |
val, nom, source = result
|
| 383 |
justif = justify_alternative_value(matiere, nom, val, source)
|
|
|
|
| 384 |
return {
|
| 385 |
"valeur_kg_co2_eq": val,
|
| 386 |
"nom_intrant": nom,
|
|
@@ -389,6 +405,9 @@ def smart_search_ecoalim(
|
|
| 389 |
"justification": justif,
|
| 390 |
"llm_match_info": match_info,
|
| 391 |
}
|
|
|
|
|
|
|
|
|
|
| 392 |
return None
|
| 393 |
|
| 394 |
|
|
@@ -400,12 +419,17 @@ def smart_search_gfli(
|
|
| 400 |
Recherche intelligente dans GFLI : d'abord recherche directe,
|
| 401 |
puis traduction FR→EN, puis si pas de résultat, utilise le LLM pour trouver un match.
|
| 402 |
"""
|
|
|
|
|
|
|
|
|
|
| 403 |
# Tentative directe
|
|
|
|
| 404 |
result = data_loader.get_gfli_climate_value(matiere, country_iso)
|
| 405 |
if result:
|
| 406 |
val, nom, source = result
|
| 407 |
# Valider que ce n'est pas un faux positif (ex: "blé" → "blend")
|
| 408 |
if data_loader.is_name_match(matiere, nom):
|
|
|
|
| 409 |
return {
|
| 410 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 411 |
"nom_intrant": nom,
|
|
@@ -414,15 +438,18 @@ def smart_search_gfli(
|
|
| 414 |
"justification": None,
|
| 415 |
}
|
| 416 |
# Faux positif — on continue vers la traduction / LLM
|
|
|
|
| 417 |
|
| 418 |
# Tentative avec traduction FR→EN
|
| 419 |
matiere_en = translate_matiere_to_english(matiere)
|
| 420 |
if matiere_en.lower() != matiere.lower():
|
|
|
|
| 421 |
result = data_loader.get_gfli_climate_value(matiere_en, country_iso)
|
| 422 |
if result:
|
| 423 |
val, nom, source = result
|
| 424 |
# Traduction nécessaire → pas un match exact
|
| 425 |
if data_loader.is_name_match(matiere_en, nom):
|
|
|
|
| 426 |
return {
|
| 427 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 428 |
"nom_intrant": nom,
|
|
@@ -430,16 +457,48 @@ def smart_search_gfli(
|
|
| 430 |
"match_exact": False,
|
| 431 |
"justification": f"Traduction automatique : '{matiere}' → '{matiere_en}'",
|
| 432 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
|
| 434 |
# Tentative avec traduction EN->FR puis FR->EN (double sens)
|
| 435 |
matiere_fr = translate_matiere_to_french(matiere)
|
| 436 |
if matiere_fr.lower() != matiere.lower():
|
| 437 |
matiere_en2 = translate_matiere_to_english(matiere_fr)
|
| 438 |
if matiere_en2.lower() != matiere.lower() and matiere_en2.lower() != matiere_en.lower():
|
|
|
|
| 439 |
result = data_loader.get_gfli_climate_value(matiere_en2, country_iso)
|
| 440 |
if result:
|
| 441 |
val, nom, source = result
|
| 442 |
if data_loader.is_name_match(matiere_en2, nom):
|
|
|
|
| 443 |
return {
|
| 444 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 445 |
"nom_intrant": nom,
|
|
@@ -447,15 +506,19 @@ def smart_search_gfli(
|
|
| 447 |
"match_exact": False,
|
| 448 |
"justification": f"Traduction automatique : '{matiere}' → '{matiere_fr}' → '{matiere_en2}'",
|
| 449 |
}
|
|
|
|
| 450 |
|
| 451 |
# Tentative via LLM
|
|
|
|
| 452 |
match_info = find_matching_name_in_db(matiere, "GFLI")
|
| 453 |
if match_info.get("matched_name") and match_info["matched_name"] != "AUCUN":
|
| 454 |
matched_name = match_info["matched_name"]
|
|
|
|
| 455 |
result = data_loader.get_gfli_climate_value(matched_name, country_iso)
|
| 456 |
if result:
|
| 457 |
val, nom, source = result
|
| 458 |
justif = justify_alternative_value(matiere, nom, val, source)
|
|
|
|
| 459 |
return {
|
| 460 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 461 |
"nom_intrant": nom,
|
|
@@ -464,6 +527,9 @@ def smart_search_gfli(
|
|
| 464 |
"justification": justif,
|
| 465 |
"llm_match_info": match_info,
|
| 466 |
}
|
|
|
|
|
|
|
|
|
|
| 467 |
return None
|
| 468 |
|
| 469 |
|
|
|
|
| 6 |
"""
|
| 7 |
from __future__ import annotations
|
| 8 |
|
| 9 |
+
import logging
|
| 10 |
from typing import Optional
|
| 11 |
from difflib import get_close_matches
|
| 12 |
from mistralai import Mistral
|
| 13 |
|
| 14 |
import config
|
| 15 |
import data_loader
|
| 16 |
+
import synonym_service
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
|
| 21 |
def _get_client() -> Mistral:
|
|
|
|
| 343 |
puis si pas de résultat, utilise le LLM pour trouver un match.
|
| 344 |
Retourne un dict avec valeur, nom, source et éventuellement justification.
|
| 345 |
"""
|
| 346 |
+
pays_str = f" (production: {pays_production}, transformation: {pays_transformation})" if pays_production or pays_transformation else ""
|
| 347 |
+
logger.debug(f"smart_search_ecoalim: Recherche '{matiere}'{pays_str}")
|
| 348 |
+
|
| 349 |
# Tentative directe
|
| 350 |
+
logger.debug(f" Stratégie 1: Recherche directe pour '{matiere}'")
|
| 351 |
result = data_loader.get_ecoalim_climate_value(matiere, pays_production, pays_transformation)
|
| 352 |
if result:
|
| 353 |
val, nom, source = result
|
| 354 |
# Valider que ce n'est pas un faux positif
|
| 355 |
if data_loader.is_name_match(matiere, nom):
|
| 356 |
+
logger.debug(f" ✓ Trouvé par recherche directe: {nom}")
|
| 357 |
return {
|
| 358 |
"valeur_kg_co2_eq": val,
|
| 359 |
"nom_intrant": nom,
|
|
|
|
| 362 |
"justification": None,
|
| 363 |
}
|
| 364 |
# Faux positif — on continue vers le LLM
|
| 365 |
+
logger.debug(f" ✗ Faux positif détecté pour '{matiere}' → '{nom}'")
|
| 366 |
|
| 367 |
# Tentative avec traduction EN->FR
|
| 368 |
matiere_fr = translate_matiere_to_french(matiere)
|
| 369 |
if matiere_fr.lower() != matiere.lower():
|
| 370 |
+
logger.debug(f" Stratégie 2: Traduction EN→FR '{matiere}' → '{matiere_fr}'")
|
| 371 |
result = data_loader.get_ecoalim_climate_value(matiere_fr, pays_production, pays_transformation)
|
| 372 |
if not result:
|
| 373 |
result = data_loader.get_ecoalim_climate_value(matiere_fr)
|
| 374 |
if result:
|
| 375 |
val, nom, source = result
|
| 376 |
if data_loader.is_name_match(matiere_fr, nom):
|
| 377 |
+
logger.debug(f" ✓ Trouvé par traduction EN→FR: {nom}")
|
| 378 |
return {
|
| 379 |
"valeur_kg_co2_eq": val,
|
| 380 |
"nom_intrant": nom,
|
|
|
|
| 382 |
"match_exact": False,
|
| 383 |
"justification": f"Traduction automatique : '{matiere}' → '{matiere_fr}'",
|
| 384 |
}
|
| 385 |
+
logger.debug(f" ✗ Faux positif après traduction EN→FR")
|
| 386 |
|
| 387 |
# Tentative via LLM
|
| 388 |
+
logger.debug(f" Stratégie 3: Recherche via LLM pour '{matiere}'")
|
| 389 |
match_info = find_matching_name_in_db(matiere, "ECOALIM")
|
| 390 |
if match_info.get("matched_name") and match_info["matched_name"] != "AUCUN":
|
| 391 |
matched_name = match_info["matched_name"]
|
| 392 |
+
logger.debug(f" LLM a suggéré: '{matched_name}'")
|
| 393 |
result = data_loader.get_ecoalim_climate_value(matched_name, pays_production, pays_transformation)
|
| 394 |
if not result:
|
| 395 |
result = data_loader.get_ecoalim_climate_value(matched_name)
|
| 396 |
if result:
|
| 397 |
val, nom, source = result
|
| 398 |
justif = justify_alternative_value(matiere, nom, val, source)
|
| 399 |
+
logger.debug(f" ✓ Trouvé via LLM: {nom}")
|
| 400 |
return {
|
| 401 |
"valeur_kg_co2_eq": val,
|
| 402 |
"nom_intrant": nom,
|
|
|
|
| 405 |
"justification": justif,
|
| 406 |
"llm_match_info": match_info,
|
| 407 |
}
|
| 408 |
+
logger.debug(f" ✗ Suggestion LLM '{matched_name}' non trouvée dans ECOALIM{pays_str}")
|
| 409 |
+
|
| 410 |
+
logger.debug(f" ✗ Aucune correspondance trouvée pour '{matiere}'{pays_str}")
|
| 411 |
return None
|
| 412 |
|
| 413 |
|
|
|
|
| 419 |
Recherche intelligente dans GFLI : d'abord recherche directe,
|
| 420 |
puis traduction FR→EN, puis si pas de résultat, utilise le LLM pour trouver un match.
|
| 421 |
"""
|
| 422 |
+
country_str = f" (pays: {country_iso})" if country_iso else ""
|
| 423 |
+
logger.debug(f"smart_search_gfli: Recherche '{matiere}'{country_str}")
|
| 424 |
+
|
| 425 |
# Tentative directe
|
| 426 |
+
logger.debug(f" Stratégie 1: Recherche directe pour '{matiere}'")
|
| 427 |
result = data_loader.get_gfli_climate_value(matiere, country_iso)
|
| 428 |
if result:
|
| 429 |
val, nom, source = result
|
| 430 |
# Valider que ce n'est pas un faux positif (ex: "blé" → "blend")
|
| 431 |
if data_loader.is_name_match(matiere, nom):
|
| 432 |
+
logger.debug(f" ✓ Trouvé par recherche directe: {nom}")
|
| 433 |
return {
|
| 434 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 435 |
"nom_intrant": nom,
|
|
|
|
| 438 |
"justification": None,
|
| 439 |
}
|
| 440 |
# Faux positif — on continue vers la traduction / LLM
|
| 441 |
+
logger.debug(f" ✗ Faux positif détecté pour '{matiere}' → '{nom}'")
|
| 442 |
|
| 443 |
# Tentative avec traduction FR→EN
|
| 444 |
matiere_en = translate_matiere_to_english(matiere)
|
| 445 |
if matiere_en.lower() != matiere.lower():
|
| 446 |
+
logger.debug(f" Stratégie 2: Traduction FR→EN '{matiere}' → '{matiere_en}'")
|
| 447 |
result = data_loader.get_gfli_climate_value(matiere_en, country_iso)
|
| 448 |
if result:
|
| 449 |
val, nom, source = result
|
| 450 |
# Traduction nécessaire → pas un match exact
|
| 451 |
if data_loader.is_name_match(matiere_en, nom):
|
| 452 |
+
logger.debug(f" ✓ Trouvé par traduction FR→EN: {nom}")
|
| 453 |
return {
|
| 454 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 455 |
"nom_intrant": nom,
|
|
|
|
| 457 |
"match_exact": False,
|
| 458 |
"justification": f"Traduction automatique : '{matiere}' → '{matiere_en}'",
|
| 459 |
}
|
| 460 |
+
logger.debug(f" ✗ Faux positif après traduction FR→EN")
|
| 461 |
+
|
| 462 |
+
# Stratégie 2.5: Recherche via synonymes Feedipedia
|
| 463 |
+
logger.debug(f" Stratégie 2.5: Recherche via synonymes Feedipedia")
|
| 464 |
+
synonyms = synonym_service.get_all_synonyms(matiere)
|
| 465 |
+
if synonyms:
|
| 466 |
+
logger.debug(f" Synonymes trouvés ({len(synonyms)}): {', '.join(synonyms[:5])}{'...' if len(synonyms) > 5 else ''}")
|
| 467 |
+
for synonym in synonyms:
|
| 468 |
+
# Skip if we already tried this exact term
|
| 469 |
+
if synonym.lower() == matiere.lower():
|
| 470 |
+
continue
|
| 471 |
+
if matiere_en and synonym.lower() == matiere_en.lower():
|
| 472 |
+
continue
|
| 473 |
+
|
| 474 |
+
logger.debug(f" Essai du synonyme '{synonym}'")
|
| 475 |
+
result = data_loader.get_gfli_climate_value(synonym, country_iso)
|
| 476 |
+
if result:
|
| 477 |
+
val, nom, source = result
|
| 478 |
+
if data_loader.is_name_match(synonym, nom):
|
| 479 |
+
logger.debug(f" ✓ Trouvé via synonyme Feedipedia '{synonym}': {nom}")
|
| 480 |
+
return {
|
| 481 |
+
"valeur_kg_co2_eq_par_tonne": val,
|
| 482 |
+
"nom_intrant": nom,
|
| 483 |
+
"source": source,
|
| 484 |
+
"match_exact": False,
|
| 485 |
+
"justification": f"Synonyme Feedipedia : '{matiere}' → '{synonym}'",
|
| 486 |
+
}
|
| 487 |
+
logger.debug(f" ✗ Aucun synonyme Feedipedia n'a donné de résultat")
|
| 488 |
+
else:
|
| 489 |
+
logger.debug(f" ✗ Aucun synonyme trouvé dans Feedipedia pour '{matiere}'")
|
| 490 |
|
| 491 |
# Tentative avec traduction EN->FR puis FR->EN (double sens)
|
| 492 |
matiere_fr = translate_matiere_to_french(matiere)
|
| 493 |
if matiere_fr.lower() != matiere.lower():
|
| 494 |
matiere_en2 = translate_matiere_to_english(matiere_fr)
|
| 495 |
if matiere_en2.lower() != matiere.lower() and matiere_en2.lower() != matiere_en.lower():
|
| 496 |
+
logger.debug(f" Stratégie 3: Traduction bidirectionnelle '{matiere}' → '{matiere_fr}' → '{matiere_en2}'")
|
| 497 |
result = data_loader.get_gfli_climate_value(matiere_en2, country_iso)
|
| 498 |
if result:
|
| 499 |
val, nom, source = result
|
| 500 |
if data_loader.is_name_match(matiere_en2, nom):
|
| 501 |
+
logger.debug(f" ✓ Trouvé par traduction bidirectionnelle: {nom}")
|
| 502 |
return {
|
| 503 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 504 |
"nom_intrant": nom,
|
|
|
|
| 506 |
"match_exact": False,
|
| 507 |
"justification": f"Traduction automatique : '{matiere}' → '{matiere_fr}' → '{matiere_en2}'",
|
| 508 |
}
|
| 509 |
+
logger.debug(f" ✗ Faux positif après traduction bidirectionnelle")
|
| 510 |
|
| 511 |
# Tentative via LLM
|
| 512 |
+
logger.debug(f" Stratégie 4: Recherche via LLM pour '{matiere}'")
|
| 513 |
match_info = find_matching_name_in_db(matiere, "GFLI")
|
| 514 |
if match_info.get("matched_name") and match_info["matched_name"] != "AUCUN":
|
| 515 |
matched_name = match_info["matched_name"]
|
| 516 |
+
logger.debug(f" LLM a suggéré: '{matched_name}'")
|
| 517 |
result = data_loader.get_gfli_climate_value(matched_name, country_iso)
|
| 518 |
if result:
|
| 519 |
val, nom, source = result
|
| 520 |
justif = justify_alternative_value(matiere, nom, val, source)
|
| 521 |
+
logger.debug(f" ✓ Trouvé via LLM: {nom}")
|
| 522 |
return {
|
| 523 |
"valeur_kg_co2_eq_par_tonne": val,
|
| 524 |
"nom_intrant": nom,
|
|
|
|
| 527 |
"justification": justif,
|
| 528 |
"llm_match_info": match_info,
|
| 529 |
}
|
| 530 |
+
logger.debug(f" ✗ Suggestion LLM '{matched_name}' non trouvée dans GFLI{country_str}")
|
| 531 |
+
|
| 532 |
+
logger.debug(f" ✗ Aucune correspondance trouvée pour '{matiere}'{country_str}")
|
| 533 |
return None
|
| 534 |
|
| 535 |
|
src/synonym_service.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
synonym_service.py - Feedipedia-based synonym lookup for feed materials.
|
| 3 |
+
|
| 4 |
+
Provides intelligent material name resolution using the feedipedia database:
|
| 5 |
+
- 918 feed entries
|
| 6 |
+
- 1,973 French synonyms
|
| 7 |
+
- 4,742 English synonyms
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
from synonym_service import get_all_synonyms
|
| 11 |
+
|
| 12 |
+
synonyms = get_all_synonyms("luzerne")
|
| 13 |
+
# Returns: ["alfalfa", "lucerne", "luzerne", "luzerne cultivée"]
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import json
|
| 17 |
+
import logging
|
| 18 |
+
import os
|
| 19 |
+
import unicodedata
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import Dict, List, Set, Optional
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
# Module-level cache for loaded synonyms
|
| 26 |
+
_synonym_cache: Optional[Dict[str, List[str]]] = None
|
| 27 |
+
_cache_loaded = False
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _normalize_text(text: str) -> str:
|
| 31 |
+
"""
|
| 32 |
+
Normalize text for matching:
|
| 33 |
+
- Lowercase
|
| 34 |
+
- Remove accents
|
| 35 |
+
- Strip extra whitespace
|
| 36 |
+
- Remove special characters except hyphens and spaces
|
| 37 |
+
|
| 38 |
+
Examples:
|
| 39 |
+
"Luzerne" → "luzerne"
|
| 40 |
+
"MAÏS" → "mais"
|
| 41 |
+
"Blé noir" → "ble noir"
|
| 42 |
+
"""
|
| 43 |
+
if not text:
|
| 44 |
+
return ""
|
| 45 |
+
|
| 46 |
+
# Lowercase
|
| 47 |
+
text = text.lower()
|
| 48 |
+
|
| 49 |
+
# Remove accents using Unicode normalization
|
| 50 |
+
# NFD = decompose characters (é → e + ´)
|
| 51 |
+
# Then filter out combining marks
|
| 52 |
+
text = unicodedata.normalize('NFD', text)
|
| 53 |
+
text = ''.join(char for char in text if unicodedata.category(char) != 'Mn')
|
| 54 |
+
|
| 55 |
+
# Remove extra whitespace
|
| 56 |
+
text = ' '.join(text.split())
|
| 57 |
+
|
| 58 |
+
return text.strip()
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def _load_feedipedia() -> Dict[str, List[str]]:
|
| 62 |
+
"""
|
| 63 |
+
Load feedipedia data and build bidirectional synonym mappings.
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Dict mapping normalized terms to lists of all synonyms (denormalized).
|
| 67 |
+
Example: {"luzerne": ["alfalfa", "lucerne", "luzerne", "luzerne cultivée"]}
|
| 68 |
+
"""
|
| 69 |
+
# Find the feedipedia file
|
| 70 |
+
current_dir = Path(__file__).parent
|
| 71 |
+
feedipedia_path = current_dir / "data" / "feedipedia_feeds.json"
|
| 72 |
+
|
| 73 |
+
if not feedipedia_path.exists():
|
| 74 |
+
logger.error(f"Feedipedia file not found at {feedipedia_path}")
|
| 75 |
+
return {}
|
| 76 |
+
|
| 77 |
+
logger.info(f"Loading feedipedia synonyms from {feedipedia_path}")
|
| 78 |
+
|
| 79 |
+
try:
|
| 80 |
+
with open(feedipedia_path, 'r', encoding='utf-8') as f:
|
| 81 |
+
data = json.load(f)
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error(f"Failed to load feedipedia: {e}")
|
| 84 |
+
return {}
|
| 85 |
+
|
| 86 |
+
# Build synonym mappings
|
| 87 |
+
# Structure: normalized_term → [all_original_synonyms]
|
| 88 |
+
synonym_map: Dict[str, Set[str]] = {}
|
| 89 |
+
|
| 90 |
+
feed_count = 0
|
| 91 |
+
for category, feeds in data.items():
|
| 92 |
+
for feed in feeds:
|
| 93 |
+
feed_count += 1
|
| 94 |
+
|
| 95 |
+
# Collect all names for this feed
|
| 96 |
+
all_names = []
|
| 97 |
+
|
| 98 |
+
# Add feed name (usually "Material (Scientific name)")
|
| 99 |
+
feed_name = feed.get("feed_name", "")
|
| 100 |
+
if feed_name:
|
| 101 |
+
# Extract main name before parenthesis
|
| 102 |
+
main_name = feed_name.split('(')[0].strip()
|
| 103 |
+
if main_name:
|
| 104 |
+
all_names.append(main_name)
|
| 105 |
+
|
| 106 |
+
# Add English names
|
| 107 |
+
english_names = feed.get("english_names", [])
|
| 108 |
+
all_names.extend(english_names)
|
| 109 |
+
|
| 110 |
+
# Add French names
|
| 111 |
+
french_names = feed.get("french_names", [])
|
| 112 |
+
all_names.extend(french_names)
|
| 113 |
+
|
| 114 |
+
# Remove duplicates and empty strings
|
| 115 |
+
all_names = [name for name in all_names if name.strip()]
|
| 116 |
+
|
| 117 |
+
if not all_names:
|
| 118 |
+
continue
|
| 119 |
+
|
| 120 |
+
# For each name, map its normalized form to ALL names (including itself)
|
| 121 |
+
for name in all_names:
|
| 122 |
+
normalized = _normalize_text(name)
|
| 123 |
+
if normalized:
|
| 124 |
+
if normalized not in synonym_map:
|
| 125 |
+
synonym_map[normalized] = set()
|
| 126 |
+
# Add all other names as synonyms
|
| 127 |
+
synonym_map[normalized].update(all_names)
|
| 128 |
+
|
| 129 |
+
logger.info(f"Loaded {feed_count} feeds with {len(synonym_map)} unique normalized terms")
|
| 130 |
+
|
| 131 |
+
# Convert sets to sorted lists for consistent ordering
|
| 132 |
+
result = {key: sorted(list(values)) for key, values in synonym_map.items()}
|
| 133 |
+
|
| 134 |
+
return result
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def _get_cache() -> Dict[str, List[str]]:
|
| 138 |
+
"""Get or initialize the synonym cache (lazy loading)."""
|
| 139 |
+
global _synonym_cache, _cache_loaded
|
| 140 |
+
|
| 141 |
+
if not _cache_loaded:
|
| 142 |
+
logger.debug("Initializing synonym cache (first use)")
|
| 143 |
+
_synonym_cache = _load_feedipedia()
|
| 144 |
+
_cache_loaded = True
|
| 145 |
+
|
| 146 |
+
return _synonym_cache or {}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def get_all_synonyms(term: str) -> List[str]:
|
| 150 |
+
"""
|
| 151 |
+
Get all synonyms for a given term (both French and English).
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
term: Material name in any language (e.g., "luzerne", "Lucerne", "ALFALFA")
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
List of all synonyms including the original term.
|
| 158 |
+
Returns empty list if term is unknown.
|
| 159 |
+
|
| 160 |
+
Examples:
|
| 161 |
+
>>> get_all_synonyms("luzerne")
|
| 162 |
+
["alfalfa", "lucerne", "luzerne", "luzerne cultivée"]
|
| 163 |
+
|
| 164 |
+
>>> get_all_synonyms("MAÏS")
|
| 165 |
+
["corn", "maize", "maïs"]
|
| 166 |
+
|
| 167 |
+
>>> get_all_synonyms("unknown_material")
|
| 168 |
+
[]
|
| 169 |
+
"""
|
| 170 |
+
if not term:
|
| 171 |
+
return []
|
| 172 |
+
|
| 173 |
+
cache = _get_cache()
|
| 174 |
+
normalized = _normalize_text(term)
|
| 175 |
+
|
| 176 |
+
synonyms = cache.get(normalized, [])
|
| 177 |
+
|
| 178 |
+
if synonyms:
|
| 179 |
+
logger.debug(f"Found {len(synonyms)} synonyms for '{term}': {synonyms[:5]}...")
|
| 180 |
+
else:
|
| 181 |
+
logger.debug(f"No synonyms found for '{term}'")
|
| 182 |
+
|
| 183 |
+
return synonyms
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def get_english_synonyms(french_term: str) -> List[str]:
|
| 187 |
+
"""
|
| 188 |
+
Get English synonyms for a French term.
|
| 189 |
+
|
| 190 |
+
Note: This is a convenience function. In practice, get_all_synonyms()
|
| 191 |
+
returns both French and English, so filtering may be needed.
|
| 192 |
+
|
| 193 |
+
Args:
|
| 194 |
+
french_term: French material name (e.g., "luzerne")
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
List of potential English equivalents
|
| 198 |
+
"""
|
| 199 |
+
all_syns = get_all_synonyms(french_term)
|
| 200 |
+
|
| 201 |
+
# Heuristic: English terms are usually in the original feedipedia english_names
|
| 202 |
+
# For now, just return all synonyms (caller can filter)
|
| 203 |
+
return all_syns
|
| 204 |
+
|
| 205 |
+
|
| 206 |
+
def get_french_synonyms(english_term: str) -> List[str]:
|
| 207 |
+
"""
|
| 208 |
+
Get French synonyms for an English term.
|
| 209 |
+
|
| 210 |
+
Args:
|
| 211 |
+
english_term: English material name (e.g., "alfalfa")
|
| 212 |
+
|
| 213 |
+
Returns:
|
| 214 |
+
List of potential French equivalents
|
| 215 |
+
"""
|
| 216 |
+
all_syns = get_all_synonyms(english_term)
|
| 217 |
+
|
| 218 |
+
# Return all synonyms (caller can filter if needed)
|
| 219 |
+
return all_syns
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
def get_cache_stats() -> Dict[str, int]:
|
| 223 |
+
"""
|
| 224 |
+
Get statistics about the loaded cache.
|
| 225 |
+
|
| 226 |
+
Returns:
|
| 227 |
+
Dict with cache statistics
|
| 228 |
+
"""
|
| 229 |
+
cache = _get_cache()
|
| 230 |
+
|
| 231 |
+
total_terms = len(cache)
|
| 232 |
+
total_synonyms = sum(len(syns) for syns in cache.values())
|
| 233 |
+
|
| 234 |
+
return {
|
| 235 |
+
"total_normalized_terms": total_terms,
|
| 236 |
+
"total_synonym_entries": total_synonyms,
|
| 237 |
+
"cache_loaded": _cache_loaded,
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def clear_cache():
|
| 242 |
+
"""Clear the synonym cache (useful for testing)."""
|
| 243 |
+
global _synonym_cache, _cache_loaded
|
| 244 |
+
_synonym_cache = None
|
| 245 |
+
_cache_loaded = False
|
| 246 |
+
logger.debug("Synonym cache cleared")
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# For debugging: print cache stats when module loads in debug mode
|
| 250 |
+
if __name__ == "__main__":
|
| 251 |
+
logging.basicConfig(level=logging.DEBUG)
|
| 252 |
+
|
| 253 |
+
print("Testing synonym service...")
|
| 254 |
+
print()
|
| 255 |
+
|
| 256 |
+
# Test luzerne
|
| 257 |
+
test_terms = ["luzerne", "alfalfa", "lucerne", "blé", "wheat", "orge", "barley", "maïs"]
|
| 258 |
+
|
| 259 |
+
for term in test_terms:
|
| 260 |
+
syns = get_all_synonyms(term)
|
| 261 |
+
print(f"{term:20} → {len(syns):3} synonyms: {', '.join(syns[:5])}")
|
| 262 |
+
|
| 263 |
+
print()
|
| 264 |
+
print("Cache stats:", get_cache_stats())
|