Spaces:

CCPA-GAIA
/

GAIA26CCPA

Sleeping

App Files Files Community

JosephMcDonnell commited on Feb 23

Commit

a89d575

1 Parent(s): 3bafa5f

test rebase (#6)

Browse files

- new features (0946dfaf6824efaca5c7a3bd7905bdac28b5ff0e)
- wip (4924e8ef49ecb74c90b127eb2661e274ce074658)

Files changed (5) hide show

src/app.py +263 -66
src/config.py +2 -2
src/data_loader.py +45 -15
src/flowchart_engine.py +477 -67
src/llm_service.py +341 -7

src/app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import streamlit as st
 import pandas as pd
 from flowchart_engine import evaluate_carbon_impact, CarbonResult
 import data_loader
 import config
@@ -41,12 +42,6 @@ def get_country_list():
     noms = sorted(set(k.title() for k in config.PAYS_FR_TO_ISO.keys()))
     return noms
-@st.cache_data
-def get_matiere_list():
-    """Retourne la liste des matières premières disponibles dans EcoALIM."""
-    return data_loader.get_ecoalim_matieres()
 # ============================================================================
 # Composant autocomplete maison
 # ============================================================================
@@ -94,12 +89,11 @@ col_form, col_info = st.columns([2, 1])
 with col_form:
     st.subheader("📝 Formulaire de saisie")
-    matiere = autocomplete_input(
         "Nom de la matière première",
-        get_matiere_list(),
         key="input_matiere",
         placeholder="Ex : BLE, T.TNSL DEC., ORGE, T. COLZA, LUZERNE…",
-        help_text="Entrez le nom usuel de la matière première. Des suggestions apparaîtront.",
     )
     provenance_connue = st.radio(
@@ -163,8 +157,130 @@ if run_button:
             pays_transformation=pays_transfo,
         )
     st.divider()
     # ------------------------------------------------------------------
     # Section 1 : Résultat principal
     # ------------------------------------------------------------------
@@ -177,30 +293,17 @@ if run_button:
         with col1:
             if result.impact_kg_co2_eq is not None:
-                if "tonne" in result.unite_source:
-                    # GFLI : valeur en kg CO2 / tonne
-                    st.metric(
-                        label="Impact carbone",
-                        value=f"{result.impact_kg_co2_eq:.2f}",
-                        delta=f"kg CO2 eq / tonne",
-                    )
-                    st.metric(
-                        label="Soit en tonnes CO2 eq / tonne produit",
-                        value=f"{result.impact_tonne_co2_eq:.4f}",
-                        delta="t CO2 eq / t produit",
-                    )
                 else:
-                    # EcoALIM : valeur en kg CO2 / kg
-                    st.metric(
-                        label="Impact carbone",
-                        value=f"{result.impact_kg_co2_eq:.4f}",
-                        delta=f"kg CO2 eq / kg",
-                    )
-                    st.metric(
-                        label="Soit en tonnes CO2 eq / tonne produit",
-                        value=f"{result.impact_kg_co2_eq:.4f}",
-                        delta="t CO2 eq / t produit (même valeur numérique)",
-                    )
         with col2:
             st.markdown(f"**Source :** {result.source_db}")
@@ -215,6 +318,103 @@ if run_button:
             if result.pays_transformation:
                 st.markdown(f"**Pays transformation :** {result.pays_transformation}")
     # ------------------------------------------------------------------
     # Section 2 : Parcours de logique (logigramme)
     # ------------------------------------------------------------------
@@ -234,11 +434,28 @@ if run_button:
     # ------------------------------------------------------------------
     st.subheader("🔍 Détail des recherches effectuées")
     for action in result.actions_appliquees:
-        if action.startswith("   →"):
-            st.success(action)
         else:
-            st.markdown(f"- {action}")
     # ------------------------------------------------------------------
     # Section 4 : Justification si valeur alternative
@@ -248,35 +465,7 @@ if run_button:
         st.info(result.justification_alternative)
     # ------------------------------------------------------------------
-    # Section 5 : Candidats alternatifs
-    # ------------------------------------------------------------------
-    if result.candidats_alternatifs:
-        st.subheader("📋 Produits candidats (triés par pertinence)")
-        if not result.match_exact:
-            st.warning("⚠️ Pas de correspondance exacte — voici les produits les plus proches avec leur impact carbone.")
-        else:
-            st.info("ℹ️ Autres produits correspondant à la recherche dans les bases de données.")
-        # Construire un DataFrame pour un affichage clair
-        df_candidates = pd.DataFrame(result.candidats_alternatifs)
-        df_candidates = df_candidates.rename(columns={
-            "nom": "Intrant",
-            "impact": "Impact carbone",
-            "unite": "Unité",
-            "source": "Base",
-        })
-        # Formater les colonnes
-        st.dataframe(
-            df_candidates,
-            use_container_width=True,
-            hide_index=True,
-            column_config={
-                "Impact carbone": st.column_config.NumberColumn(format="%.4f"),
-            },
-        )
-    # ------------------------------------------------------------------
-    # Section 6 : Classification détaillée
     # ------------------------------------------------------------------
     with st.expander("📋 Détail de la classification brut/transformé"):
         st.markdown(f"**Classification :** {result.classification}")
@@ -334,8 +523,16 @@ if uploaded_file is not None:
                         "Pays production": pays_p or "",
                         "Pays transformation": pays_t or "",
                         "Classification": res.classification,
-                        "Impact (kg CO2 eq)": res.impact_kg_co2_eq,
-                        "Unité": res.unite_source,
                         "Source": res.source_db,
                         "Intrant utilisé": res.intrant_utilise,
                         "Match exact": "Oui" if res.match_exact else "Non",

 import pandas as pd
 from flowchart_engine import evaluate_carbon_impact, CarbonResult
+import llm_service
 import data_loader
 import config
     noms = sorted(set(k.title() for k in config.PAYS_FR_TO_ISO.keys()))
     return noms
 # ============================================================================
 # Composant autocomplete maison
 # ============================================================================
 with col_form:
     st.subheader("📝 Formulaire de saisie")
+    matiere = st.text_input(
         "Nom de la matière première",
         key="input_matiere",
         placeholder="Ex : BLE, T.TNSL DEC., ORGE, T. COLZA, LUZERNE…",
+        help="Entrez le nom usuel de la matière première.",
     )
     provenance_connue = st.radio(
             pays_transformation=pays_transfo,
         )
+    # Stocker le résultat et la matière dans session_state pour persistance
+    st.session_state["last_result"] = result
+    st.session_state["last_matiere"] = matiere.strip()
+    # Nettoyer les anciennes alternatives manuelles
+    st.session_state.pop("searched_alternatives", None)
+# ============================================================================
+# Affichage des résultats (depuis session_state — persiste entre reruns)
+# ============================================================================
+if "last_result" in st.session_state:
+    result = st.session_state["last_result"]
     st.divider()
+    # ------------------------------------------------------------------
+    # Section 0 : Produits candidats
+    # ------------------------------------------------------------------
+    if result.candidats_alternatifs:
+        st.subheader("📋 Produits candidats")
+        if not result.match_exact:
+            st.warning("⚠️ Pas de correspondance exacte — choisissez un produit proche si besoin.")
+        else:
+            st.info("ℹ️ Autres produits correspondant à la recherche.")
+        if result.candidats_reflexion:
+            st.markdown("**Avis du LLM :**")
+            if result.candidat_recommande:
+                st.markdown(f"Meilleur candidat proposé : **{result.candidat_recommande}**")
+            st.info(result.candidats_reflexion)
+        # En-têtes
+        head = st.columns([6, 3, 2])
+        head[0].markdown("**Intrant**")
+        head[1].markdown("**Impact (kg CO2 eq / t)**")
+        head[2].markdown("**Base**")
+        for i, cand in enumerate(result.candidats_alternatifs):
+            nom = cand.get("nom", "")
+            impact = cand.get("impact", 0)
+            unite = str(cand.get("unite", ""))
+            source = cand.get("source", "")
+            source_upper = source.upper()
+            is_gfli = "GFLI" in source_upper
+            if "tonne" in unite or is_gfli:
+                impact_kg_t = impact
+            else:
+                # EcoALIM : kg/kg -> kg/t (x1000)
+                impact_kg_t = impact * 1000.0
+            row = st.columns([6, 3, 2])
+            row[0].markdown(nom)
+            row[1].markdown(f"{impact_kg_t:.2f}")
+            row[2].markdown(source if source else "—")
+        st.divider()
+    # Section 0b : 4 alternatives (fallback)
+    # ------------------------------------------------------------------
+    if result.alternatives_combined or result.alternatives_itinerary:
+        st.subheader("🎯 4 Alternatives proposées (absence de correspondance)")
+        st.info("Quand aucune matière exacte n'est trouvée, voici 4 propositions pour substitution :")
+        # Créer 4 colonnes
+        col1, col2, col3, col4 = st.columns(4)
+        # Alternative 1: ITINERARY
+        with col1:
+            if result.alternatives_itinerary:
+                alt = result.alternatives_itinerary
+                st.markdown("### 🔄 Itinéraire")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### 🔄 Itinéraire")
+                st.caption("Non disponible")
+        # Alternative 2: LOCALITY
+        with col2:
+            if result.alternatives_locality:
+                alt = result.alternatives_locality
+                st.markdown("### 📍 Localité")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### 📍 Localité")
+                st.caption("Non disponible")
+        # Alternative 3: FORM
+        with col3:
+            if result.alternatives_form:
+                alt = result.alternatives_form
+                st.markdown("### 🌱 Forme structurelle")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### 🌱 Forme structurelle")
+                st.caption("Non disponible")
+        # Alternative 4: COMBINED
+        with col4:
+            if result.alternatives_combined:
+                alt = result.alternatives_combined
+                st.markdown("### ✨ Meilleur compromis")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}", delta="RECOMMANDÉ ✓")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### ✨ Meilleur compromis")
+                st.caption("Non disponible")
+        st.divider()
     # ------------------------------------------------------------------
     # Section 1 : Résultat principal
     # ------------------------------------------------------------------
         with col1:
             if result.impact_kg_co2_eq is not None:
+                # GFLI : kg CO2 eq / t ; EcoALIM : kg/kg -> kg/t
+                if "tonne" in (result.unite_source or ""):
+                    impact_kg_t = result.impact_kg_co2_eq
                 else:
+                    impact_kg_t = result.impact_kg_co2_eq * 1000.0
+                st.metric(
+                    label="Impact carbone",
+                    value=f"{impact_kg_t:.2f}",
+                    delta="kg CO2 eq / t produit",
+                )
         with col2:
             st.markdown(f"**Source :** {result.source_db}")
             if result.pays_transformation:
                 st.markdown(f"**Pays transformation :** {result.pays_transformation}")
+    # ------------------------------------------------------------------
+    # Section 0c : Bouton "Chercher une alternative" si match non exact
+    # ------------------------------------------------------------------
+    if not result.match_exact and result.impact_kg_co2_eq is not None:
+        st.divider()
+        st.info("💡 La correspondance n'est pas exacte. Vous pouvez chercher d'autres alternatives.")
+        col1, col2, col3 = st.columns([1, 2, 1])
+        with col2:
+            if st.button("🔍 Chercher une alternative plus proche", use_container_width=True, key="btn_find_alternative"):
+                matiere_search = st.session_state.get("last_matiere", "")
+                with st.spinner("Recherche des 4 alternatives en cours..."):
+                    # Déterminer la base GFLI ou EcoALIM selon le source_db
+                    db_name = "GFLI" if "GFLI" in (result.source_db or "") else "ECOALIM"
+                    # Déterminer le pays_hint si applicable
+                    country_hint = result.pays_production or result.pays_transformation
+                    # Forcer la recherche des alternatives
+                    alternatives = llm_service.find_alternative_materials(
+                        matiere_search,
+                        db_name=db_name,
+                        country_hint=country_hint
+                    )
+                    if alternatives:
+                        st.session_state["searched_alternatives"] = {
+                            "itinerary": alternatives.get("itinerary"),
+                            "locality": alternatives.get("locality"),
+                            "form": alternatives.get("form"),
+                            "combined": alternatives.get("combined"),
+                        }
+                        st.rerun()
+                    else:
+                        st.error("❌ Pas d'alternatives trouvées.")
+    # Afficher les alternatives trouvées via bouton (persistées en session_state)
+    if "searched_alternatives" in st.session_state:
+        st.subheader("🎯 Alternatives recherchées")
+        st.info("Alternatives générées suite à votre demande :")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            alt = st.session_state["searched_alternatives"].get("itinerary")
+            if alt:
+                st.markdown("### 🔄 Itinéraire")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### 🔄 Itinéraire")
+                st.caption("Non disponible")
+        with col2:
+            alt = st.session_state["searched_alternatives"].get("locality")
+            if alt:
+                st.markdown("### 📍 Localité")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### 📍 Localité")
+                st.caption("Non disponible")
+        with col3:
+            alt = st.session_state["searched_alternatives"].get("form")
+            if alt:
+                st.markdown("### 🌱 Forme structurelle")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### 🌱 Forme structurelle")
+                st.caption("Non disponible")
+        with col4:
+            alt = st.session_state["searched_alternatives"].get("combined")
+            if alt:
+                st.markdown("### ✨ Meilleur compromis")
+                st.markdown(f"**{alt['name']}**")
+                st.metric("Impact", f"{alt['impact']:.2f}", delta="RECOMMANDÉ ✓")
+                st.caption(f"kg CO2 eq/t | Source: {alt['source']}")
+                with st.expander("Raison"):
+                    st.markdown(alt['reasoning'])
+            else:
+                st.markdown("### ✨ Meilleur compromis")
+                st.caption("Non disponible")
+        st.divider()
     # ------------------------------------------------------------------
     # Section 2 : Parcours de logique (logigramme)
     # ------------------------------------------------------------------
     # ------------------------------------------------------------------
     st.subheader("🔍 Détail des recherches effectuées")
+    import re
+    def _format_action_line(line: str) -> str:
+        """Convertit les impacts affiches en kg CO2 eq / t produit."""
+        m = re.search(r"=\s*([0-9]+(?:\.[0-9]+)?)\s*kg\s*CO2\s*eq\s*/\s*t", line)
+        if m:
+            val = float(m.group(1))
+            return re.sub(r"=\s*[0-9]+(?:\.[0-9]+)?\s*kg\s*CO2\s*eq\s*/\s*t",
+                          f"= {val:.2f} kg CO2 eq / t", line)
+        m = re.search(r"=\s*([0-9]+(?:\.[0-9]+)?)\s*kg\s*CO2\s*eq\s*/\s*kg", line)
+        if m:
+            val = float(m.group(1)) * 1000.0
+            return re.sub(r"=\s*[0-9]+(?:\.[0-9]+)?\s*kg\s*CO2\s*eq\s*/\s*kg",
+                          f"= {val:.2f} kg CO2 eq / t", line)
+        return line
     for action in result.actions_appliquees:
+        line = _format_action_line(action)
+        if line.startswith("   →"):
+            st.success(line)
         else:
+            st.markdown(f"- {line}")
     # ------------------------------------------------------------------
     # Section 4 : Justification si valeur alternative
         st.info(result.justification_alternative)
     # ------------------------------------------------------------------
+    # Section 5 : Classification détaillée
     # ------------------------------------------------------------------
     with st.expander("📋 Détail de la classification brut/transformé"):
         st.markdown(f"**Classification :** {result.classification}")
                         "Pays production": pays_p or "",
                         "Pays transformation": pays_t or "",
                         "Classification": res.classification,
+                        "Impact (kg CO2 eq / t)": (
+                            res.impact_kg_co2_eq
+                            if res.impact_kg_co2_eq is None
+                            else (
+                                res.impact_kg_co2_eq
+                                if "tonne" in (res.unite_source or "")
+                                else res.impact_kg_co2_eq * 1000.0
+                            )
+                        ),
+                        "Unité": "kg CO2 eq / t produit",
                         "Source": res.source_db,
                         "Intrant utilisé": res.intrant_utilise,
                         "Match exact": "Oui" if res.match_exact else "Non",

src/config.py CHANGED Viewed

@@ -6,12 +6,11 @@ from dotenv import load_dotenv
 load_dotenv()
-IS_PRODUCTION = bool(os.getenv("IS_PRODUCTION", 0))
 # ---------------------------------------------------------------------------
 # Clé API Mistral
 # ---------------------------------------------------------------------------
 MISTRAL_API_KEY: str = os.getenv("MISTRAL_API_KEY", "")
 # Clé Hugging Face
 HF_KEY = os.getenv("HF_KEY", "")
 # ---------------------------------------------------------------------------
@@ -139,3 +138,4 @@ EUROPEAN_COUNTRIES_FR = {
 # Modèle Mistral à utiliser
 MISTRAL_MODEL = "mistral-small-latest"

 load_dotenv()
 # ---------------------------------------------------------------------------
 # Clé API Mistral
 # ---------------------------------------------------------------------------
 MISTRAL_API_KEY: str = os.getenv("MISTRAL_API_KEY", "")
+IS_PRODUCTION = bool(os.getenv("IS_PRODUCTION", 0))
 # Clé Hugging Face
 HF_KEY = os.getenv("HF_KEY", "")
 # ---------------------------------------------------------------------------
 # Modèle Mistral à utiliser
 MISTRAL_MODEL = "mistral-small-latest"
+MISTRAL_MODEL_POWERFUL = "mistral-large-latest"  # Pour analyses complexes (alternatives, tri)

src/data_loader.py CHANGED Viewed

@@ -4,14 +4,13 @@ data_loader.py - Chargement et indexation des bases de données EcoALIM, GFLI et
 from __future__ import annotations
 import json
-import os
 import re
 from functools import lru_cache
 from typing import Dict, List, Optional, Tuple
 import pandas as pd
 import pdfplumber
-from datasets import load_dataset, DownloadMode
 import config
@@ -54,6 +53,18 @@ def _normalize_for_search(text: str) -> str:
     return ascii_text
 def is_name_match(matiere: str, intrant_name: str) -> bool:
     """
     Vérifie si le nom de la matière est une correspondance réelle (mot entier)
@@ -95,6 +106,12 @@ def search_ecoalim(
     mask_starts = df_norms.str.startswith(matiere_norm, na=False)
     pattern_word = r'\b' + re.escape(matiere_norm) + r'\b'
     mask_word = df_norms.str.contains(pattern_word, na=False, regex=True)
     mask_contains = df_norms.str.contains(re.escape(matiere_norm), na=False)
     # Use best available mask with priority
@@ -102,6 +119,8 @@ def search_ecoalim(
         mask = mask_starts
     elif mask_word.any():
         mask = mask_word
     elif mask_contains.any():
         mask = mask_contains
     else:
@@ -130,10 +149,14 @@ def search_ecoalim(
     # Sort by relevance: entries starting with the search term come first
     if not result.empty:
         result_norms = result[nom_col].apply(lambda x: _normalize_for_search(str(x)))
-        result["_priority"] = 2
         result.loc[result_norms.str.contains(pattern_word, na=False, regex=True), "_priority"] = 1
         result.loc[result_norms.str.startswith(matiere_norm, na=False), "_priority"] = 0
-        result = result.sort_values("_priority").drop(columns=["_priority"])
     return result
@@ -221,15 +244,21 @@ def search_gfli(
     prod_col = config.GFLI_COL_PRODUCT
     df_norms = df[prod_col].apply(lambda x: _normalize_for_search(str(x)) if pd.notna(x) else "")
-    # Strategy 1: word-boundary match
-    pattern_word = r'\b' + re.escape(matiere_norm) + r'\b'
-    mask = df_norms.str.contains(pattern_word, na=False, regex=True)
-    # Strategy 2: starts-with
     if not mask.any():
-        mask = df_norms.str.startswith(matiere_norm, na=False)
-    # Strategy 3: contains
     if not mask.any():
         mask = df_norms.str.contains(re.escape(matiere_norm), na=False)
@@ -316,7 +345,7 @@ def get_top_ecoalim_candidates(
     matiere: str,
     pays_production: Optional[str] = None,
     pays_transformation: Optional[str] = None,
-    top_n: int = 8,
 ) -> List[Dict]:
     """
     Retourne les top N correspondances EcoALIM triées par pertinence,
@@ -326,7 +355,8 @@ def get_top_ecoalim_candidates(
     if results.empty:
         return []
     candidates = []
-    for _, row in results.head(top_n).iterrows():
         val = row.get(config.ECOALIM_COL_CLIMATE)
         if pd.notna(val):
             candidates.append({
@@ -341,7 +371,7 @@ def get_top_ecoalim_candidates(
 def get_top_gfli_candidates(
     matiere: str,
     country_iso: Optional[str] = None,
-    top_n: int = 8,
 ) -> List[Dict]:
     """
     Retourne les top N correspondances GFLI triées par pertinence,
@@ -351,7 +381,8 @@ def get_top_gfli_candidates(
     if results.empty:
         return []
     candidates = []
-    for _, row in results.head(top_n).iterrows():
         val = row.get(config.GFLI_COL_CLIMATE)
         if pd.notna(val):
             candidates.append({
@@ -392,7 +423,6 @@ def load_pdf_text() -> str:
 def get_pdf_excerpt(max_chars: int = 15000) -> str:
     """Retourne un extrait du PDF CIR (tronqué si nécessaire) pour envoi au LLM."""
     text = load_pdf_text()
     if len(text) > max_chars:
         return text[:max_chars] + "\n... [texte tronqué]"
     return text

 from __future__ import annotations
 import json
 import re
 from functools import lru_cache
 from typing import Dict, List, Optional, Tuple
+from datasets import load_dataset,DownloadMode
 import pandas as pd
 import pdfplumber
 import config
     return ascii_text
+_STOPWORDS_FR = {
+    "de", "du", "des", "la", "le", "les", "d", "l", "a", "au", "aux"
+}
+def _tokens_for_search(text: str) -> list[str]:
+    """Découpe un texte en tokens utiles pour une recherche souple."""
+    text = _normalize_for_search(text)
+    tokens = re.findall(r"[a-z0-9]+", text)
+    return [t for t in tokens if t and t not in _STOPWORDS_FR]
 def is_name_match(matiere: str, intrant_name: str) -> bool:
     """
     Vérifie si le nom de la matière est une correspondance réelle (mot entier)
     mask_starts = df_norms.str.startswith(matiere_norm, na=False)
     pattern_word = r'\b' + re.escape(matiere_norm) + r'\b'
     mask_word = df_norms.str.contains(pattern_word, na=False, regex=True)
+    tokens = _tokens_for_search(matiere_norm)
+    mask_tokens = pd.Series(False, index=df.index)
+    if tokens:
+        mask_tokens = df_norms.apply(
+            lambda x: all(t in _tokens_for_search(x) for t in tokens)
+        )
     mask_contains = df_norms.str.contains(re.escape(matiere_norm), na=False)
     # Use best available mask with priority
         mask = mask_starts
     elif mask_word.any():
         mask = mask_word
+    elif mask_tokens.any():
+        mask = mask_tokens
     elif mask_contains.any():
         mask = mask_contains
     else:
     # Sort by relevance: entries starting with the search term come first
     if not result.empty:
         result_norms = result[nom_col].apply(lambda x: _normalize_for_search(str(x)))
+        result["_priority"] = 3
         result.loc[result_norms.str.contains(pattern_word, na=False, regex=True), "_priority"] = 1
         result.loc[result_norms.str.startswith(matiere_norm, na=False), "_priority"] = 0
+        result.loc[result_norms.apply(lambda x: all(t in _tokens_for_search(x) for t in tokens)), "_priority"] = 2
+        # Prefer OS outputs over champ when ties exist
+        result["_os_priority"] = 1
+        result.loc[result_norms.str.contains("sortie os", na=False), "_os_priority"] = 0
+        result = result.sort_values(["_priority", "_os_priority"]).drop(columns=["_priority", "_os_priority"])
     return result
     prod_col = config.GFLI_COL_PRODUCT
     df_norms = df[prod_col].apply(lambda x: _normalize_for_search(str(x)) if pd.notna(x) else "")
+    # Strategy 1: starts-with
+    mask = df_norms.str.startswith(matiere_norm, na=False)
+    # Strategy 2: word-boundary match
+    if not mask.any():
+        pattern_word = r'\b' + re.escape(matiere_norm) + r'\b'
+        mask = df_norms.str.contains(pattern_word, na=False, regex=True)
+    # Strategy 3: token-subset match (souple)
     if not mask.any():
+        tokens = _tokens_for_search(matiere_norm)
+        if tokens:
+            mask = df_norms.apply(lambda x: all(t in _tokens_for_search(x) for t in tokens))
+    # Strategy 4: contains
     if not mask.any():
         mask = df_norms.str.contains(re.escape(matiere_norm), na=False)
     matiere: str,
     pays_production: Optional[str] = None,
     pays_transformation: Optional[str] = None,
+    top_n: Optional[int] = 8,
 ) -> List[Dict]:
     """
     Retourne les top N correspondances EcoALIM triées par pertinence,
     if results.empty:
         return []
     candidates = []
+    rows = results if top_n is None else results.head(top_n)
+    for _, row in rows.iterrows():
         val = row.get(config.ECOALIM_COL_CLIMATE)
         if pd.notna(val):
             candidates.append({
 def get_top_gfli_candidates(
     matiere: str,
     country_iso: Optional[str] = None,
+    top_n: Optional[int] = 8,
 ) -> List[Dict]:
     """
     Retourne les top N correspondances GFLI triées par pertinence,
     if results.empty:
         return []
     candidates = []
+    rows = results if top_n is None else results.head(top_n)
+    for _, row in rows.iterrows():
         val = row.get(config.GFLI_COL_CLIMATE)
         if pd.notna(val):
             candidates.append({
 def get_pdf_excerpt(max_chars: int = 15000) -> str:
     """Retourne un extrait du PDF CIR (tronqué si nécessaire) pour envoi au LLM."""
     text = load_pdf_text()
     if len(text) > max_chars:
         return text[:max_chars] + "\n... [texte tronqué]"
     return text

src/flowchart_engine.py CHANGED Viewed

@@ -51,6 +51,14 @@ class CarbonResult:
     # Candidats alternatifs (pour affichage comparatif quand match non exact)
     candidats_alternatifs: List[dict] = field(default_factory=list)
     erreur: Optional[str] = None
@@ -172,7 +180,7 @@ def _resolve_node_4(matiere: str, result: CarbonResult) -> CarbonResult:
     if eco_worst:
         val, nom, src = eco_worst
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = src
         result.intrant_utilise = nom
@@ -200,6 +208,56 @@ def _resolve_node_4(matiere: str, result: CarbonResult) -> CarbonResult:
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']} = {val:.2f} kg CO2 eq/t")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' dans GFLI ni ECOALIM."
     return result
@@ -278,7 +336,7 @@ def _resolve_node_5(matiere: str, result: CarbonResult) -> CarbonResult:
     if eco_worst:
         val, nom, src = eco_worst
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = src
         result.intrant_utilise = nom
@@ -306,6 +364,56 @@ def _resolve_node_5(matiere: str, result: CarbonResult) -> CarbonResult:
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']}")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé, provenance inconnue)."
     return result
@@ -322,7 +430,7 @@ def _resolve_node_8(matiere: str, result: CarbonResult) -> CarbonResult:
     if eco_result:
         val = eco_result["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_result["source"]
         result.intrant_utilise = eco_result["nom_intrant"]
@@ -350,7 +458,7 @@ def _resolve_node_8(matiere: str, result: CarbonResult) -> CarbonResult:
     if eco_smart:
         val = eco_smart["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_smart["source"]
         result.intrant_utilise = eco_smart["nom_intrant"]
@@ -359,6 +467,56 @@ def _resolve_node_8(matiere: str, result: CarbonResult) -> CarbonResult:
         result.actions_appliquees.append(f"   → Via LLM : {eco_smart['nom_intrant']}")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (brut, France)."
     return result
@@ -422,7 +580,7 @@ def _resolve_node_9(matiere: str, pays_production: str, result: CarbonResult) ->
     if eco_result:
         val = eco_result["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_result["source"]
         result.intrant_utilise = eco_result["nom_intrant"]
@@ -431,6 +589,56 @@ def _resolve_node_9(matiere: str, pays_production: str, result: CarbonResult) ->
         result.actions_appliquees.append(f"   → Trouvé dans ECOALIM : {eco_result['nom_intrant']}")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (brut, {pays_production})."
     return result
@@ -447,7 +655,7 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
     if eco_result:
         val = eco_result["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_result["source"]
         result.intrant_utilise = eco_result["nom_intrant"]
@@ -477,7 +685,7 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
     if eco_smart:
         val = eco_smart["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
-        result.impact_tonne_co2_eq = val / 1000.0
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_smart["source"]
         result.intrant_utilise = eco_smart["nom_intrant"]
@@ -486,6 +694,56 @@ def _resolve_node_10(matiere: str, result: CarbonResult) -> CarbonResult:
         result.actions_appliquees.append(f"   → Via LLM : {eco_smart['nom_intrant']}")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé, France/France)."
     return result
@@ -557,6 +815,56 @@ def _resolve_node_11(matiere: str, result: CarbonResult) -> CarbonResult:
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']}")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé France, MP brute hors FR)."
     return result
@@ -637,6 +945,56 @@ def _resolve_node_12(matiere: str, pays_transformation: str, result: CarbonResul
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']}")
         return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé hors France)."
     return result
@@ -704,7 +1062,7 @@ def evaluate_carbon_impact(
                 answer="Intrant brut/non transformé",
             ))
             result.node_resultat = "node_4"
-            return _resolve_node_4(matiere_premiere, result)
         else:
             result.parcours.append(StepLog(
                 node_id="node_2",
@@ -712,80 +1070,103 @@ def evaluate_carbon_impact(
                 answer="Coproduit/intrant transformé",
             ))
             result.node_resultat = "node_5"
-            return _resolve_node_5(matiere_premiere, result)
-    # Provenance connue
-    result.parcours.append(StepLog(
-        node_id="node_1",
-        question="Connaissez-vous l'endroit où l'intrant a été cultivé ou produit ?",
-        answer=f"Oui — Production: {pays_production}" + (f", Transformation: {pays_transformation}" if pays_transformation else ""),
-    ))
-    if not is_transformed:
-        # Node 3 → Node 6 : où a-t-il été cultivé ?
         result.parcours.append(StepLog(
-            node_id="node_3",
-            question="Quel est le niveau de transformation ?",
-            answer="Intrant brut/non transformé",
         ))
-        if _is_france(pays_production):
-            result.parcours.append(StepLog(
-                node_id="node_6",
-                question="Où l'intrant brut a-t-il été cultivé ?",
-                answer="En France",
-            ))
-            result.node_resultat = "node_8"
-            return _resolve_node_8(matiere_premiere, result)
-        else:
             result.parcours.append(StepLog(
-                node_id="node_6",
-                question="Où l'intrant brut a-t-il été cultivé ?",
-                answer=f"Hors France — {pays_production}",
             ))
-            result.node_resultat = "node_9"
-            return _resolve_node_9(matiere_premiere, pays_production, result)
-    else:
-        # Node 3 → Node 7 : où transformé + origine MP brute ?
-        result.parcours.append(StepLog(
-            node_id="node_3",
-            question="Quel est le niveau de transformation ?",
-            answer="Coproduit/intrant transformé",
-        ))
-        if _is_france(pays_transformation) and _is_france(pays_production):
             result.parcours.append(StepLog(
-                node_id="node_7",
-                question="Où l'intrant a-t-il été transformé et d'où provient la MP brute ?",
-                answer="Transformé en France à partir de MP brute française",
             ))
-            result.node_resultat = "node_10"
-            return _resolve_node_10(matiere_premiere, result)
-        elif _is_france(pays_transformation):
-            result.parcours.append(StepLog(
-                node_id="node_7",
-                question="Où l'intrant a-t-il été transformé et d'où provient la MP brute ?",
-                answer=f"Transformé en France, MP brute de {pays_production or 'origine inconnue'}",
-            ))
-            result.node_resultat = "node_11"
-            return _resolve_node_11(matiere_premiere, result)
         else:
-            result.parcours.append(StepLog(
-                node_id="node_7",
-                question="Où l'intrant a-t-il été transformé et d'où provient la MP brute ?",
-                answer=f"Transformé hors France — {pays_transformation}",
-            ))
-            result.node_resultat = "node_12"
-            result = _resolve_node_12(matiere_premiere, pays_transformation or pays_production or "", result)
     # ------------------------------------------------------------------
     # Post-processing : collecter les candidats alternatifs
     # ------------------------------------------------------------------
     result = _collect_candidates(result)
     # Générer une justification LLM si le match n'est pas exact et qu'il n'y en a pas
     if not result.match_exact and not result.justification_alternative and not result.erreur:
         if result.intrant_utilise and result.impact_kg_co2_eq is not None:
@@ -827,29 +1208,54 @@ def _collect_candidates(result: CarbonResult) -> CarbonResult:
     # Collecter depuis la source utilisée + l'autre source
     # D'abord la source principalement utilisée
     if "ECOALIM" in source.upper():
         candidates.extend(data_loader.get_top_ecoalim_candidates(
             matiere,
             pays_production=result.pays_production,
             pays_transformation=result.pays_transformation,
-            top_n=8,
         ))
         candidates.extend(data_loader.get_top_gfli_candidates(
-            matiere, country_iso=country_iso, top_n=4,
         ))
     else:
         # Essayer aussi avec le nom traduit si on est sur GFLI
         # Le nom d'intrant utilisé contient le terme anglais
         intrant_base = result.intrant_utilise.split(",")[0].split("/")[0].strip()
         candidates.extend(data_loader.get_top_gfli_candidates(
-            intrant_base, country_iso=country_iso, top_n=8,
         ))
         candidates.extend(data_loader.get_top_ecoalim_candidates(
             matiere,
             pays_production=result.pays_production,
             pays_transformation=result.pays_transformation,
-            top_n=4,
         ))
     # Dédupliquer, exclure l'intrant sélectionné, et filtrer les faux positifs
     seen = set()
@@ -867,6 +1273,10 @@ def _collect_candidates(result: CarbonResult) -> CarbonResult:
             # Accepter quand même si ça matche le nom de base de l'intrant validé
             if intrant_base and _is_name_match(intrant_base, c["nom"]):
                 pass  # OK, même famille de produit
             else:
                 continue  # Faux positif
         seen.add(key)

     # Candidats alternatifs (pour affichage comparatif quand match non exact)
     candidats_alternatifs: List[dict] = field(default_factory=list)
+    candidat_recommande: Optional[str] = None
+    candidats_reflexion: Optional[str] = None
+    # 4 propositions d'alternatives (itinerary, locality, form, combined)
+    alternatives_itinerary: Optional[dict] = None
+    alternatives_locality: Optional[dict] = None
+    alternatives_form: Optional[dict] = None
+    alternatives_combined: Optional[dict] = None
     erreur: Optional[str] = None
     if eco_worst:
         val, nom, src = eco_worst
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = src
         result.intrant_utilise = nom
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']} = {val:.2f} kg CO2 eq/t")
         return result
+    # Étape 4 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append("4. Fallback - Recherche via LLM de 4 alternatives")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI")
+    if alternatives:
+        # Stocker les 4 alternatives dans CarbonResult
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val / 1000.0
+            result.unite_source = "kg CO2 eq / tonne de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' dans GFLI ni ECOALIM."
     return result
     if eco_worst:
         val, nom, src = eco_worst
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = src
         result.intrant_utilise = nom
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']}")
         return result
+    # Étape 4 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append("4. Fallback - Recherche via LLM de 4 alternatives (transformée)")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI")
+    if alternatives:
+        # Stocker les 4 alternatives
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val / 1000.0
+            result.unite_source = "kg CO2 eq / tonne de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé, provenance inconnue)."
     return result
     if eco_result:
         val = eco_result["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_result["source"]
         result.intrant_utilise = eco_result["nom_intrant"]
     if eco_smart:
         val = eco_smart["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_smart["source"]
         result.intrant_utilise = eco_smart["nom_intrant"]
         result.actions_appliquees.append(f"   → Via LLM : {eco_smart['nom_intrant']}")
         return result
+    # Étape 4 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append("4. Fallback - Recherche via LLM de 4 alternatives (France)")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint="France")
+    if alternatives:
+        # Stocker les 4 alternatives dans CarbonResult
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val / 1000.0
+            result.unite_source = "kg CO2 eq / tonne de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (brut, France)."
     return result
     if eco_result:
         val = eco_result["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_result["source"]
         result.intrant_utilise = eco_result["nom_intrant"]
         result.actions_appliquees.append(f"   → Trouvé dans ECOALIM : {eco_result['nom_intrant']}")
         return result
+    # Étape 4 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append(f"4. Fallback - Recherche via LLM de 4 alternatives ({pays_production})")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint=pays_production)
+    if alternatives:
+        # Stocker les 4 alternatives dans CarbonResult
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val / 1000.0
+            result.unite_source = "kg CO2 eq / tonne de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (brut, {pays_production})."
     return result
     if eco_result:
         val = eco_result["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_result["source"]
         result.intrant_utilise = eco_result["nom_intrant"]
     if eco_smart:
         val = eco_smart["valeur_kg_co2_eq"]
         result.impact_kg_co2_eq = val
+        result.impact_tonne_co2_eq = val
         result.unite_source = "kg CO2 eq / kg de produit"
         result.source_db = eco_smart["source"]
         result.intrant_utilise = eco_smart["nom_intrant"]
         result.actions_appliquees.append(f"   → Via LLM : {eco_smart['nom_intrant']}")
         return result
+    # Étape 4 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append("4. Fallback - Recherche via LLM de 4 alternatives (France)")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint="France")
+    if alternatives:
+        # Stocker les 4 alternatives dans CarbonResult
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val
+            result.unite_source = "kg CO2 eq / kg de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.4f} kg CO2 eq/kg")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé, France/France)."
     return result
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']}")
         return result
+    # Étape 5 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append("5. Fallback - Recherche via LLM de 4 alternatives (France)")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint="France")
+    if alternatives:
+        # Stocker les 4 alternatives dans CarbonResult
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val / 1000.0
+            result.unite_source = "kg CO2 eq / tonne de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé France, MP brute hors FR)."
     return result
         result.actions_appliquees.append(f"   → Via LLM : {gfli_smart['nom_intrant']}")
         return result
+    # Étape 5 : Fallback - Proposer des matières alternatives
+    result.actions_appliquees.append(f"5. Fallback - Recherche via LLM de 4 alternatives ({pays_transformation})")
+    alternatives = llm_service.find_alternative_materials(matiere, db_name="GFLI", country_hint=pays_transformation)
+    if alternatives:
+        # Stocker les 4 alternatives dans CarbonResult
+        if alternatives.get("itinerary"):
+            alt = alternatives["itinerary"]
+            result.alternatives_itinerary = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("locality"):
+            alt = alternatives["locality"]
+            result.alternatives_locality = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("form"):
+            alt = alternatives["form"]
+            result.alternatives_form = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+        if alternatives.get("combined"):
+            alt = alternatives["combined"]
+            result.alternatives_combined = {
+                "name": alt["name"],
+                "impact": alt["impact"],
+                "source": alt["source"],
+                "reasoning": alt["reasoning"],
+            }
+            # Utiliser la combined comme valeur principale
+            val = alt["impact"]
+            result.impact_kg_co2_eq = val
+            result.impact_tonne_co2_eq = val / 1000.0
+            result.unite_source = "kg CO2 eq / tonne de produit"
+            result.source_db = alt["source"]
+            result.intrant_utilise = alt["name"]
+            result.match_exact = False
+            result.justification_alternative = alt["reasoning"]
+            result.actions_appliquees.append(f"   → Matière proposée (combo) : {alt['name']} = {val:.2f} kg CO2 eq/t")
+            return result
     result.erreur = f"Aucune valeur trouvée pour '{matiere}' (transformé hors France)."
     return result
                 answer="Intrant brut/non transformé",
             ))
             result.node_resultat = "node_4"
+            result = _resolve_node_4(matiere_premiere, result)
         else:
             result.parcours.append(StepLog(
                 node_id="node_2",
                 answer="Coproduit/intrant transformé",
             ))
             result.node_resultat = "node_5"
+            result = _resolve_node_5(matiere_premiere, result)
+    else:
+        # Provenance connue
         result.parcours.append(StepLog(
+            node_id="node_1",
+            question="Connaissez-vous l'endroit où l'intrant a été cultivé ou produit ?",
+            answer=f"Oui — Production: {pays_production}" + (f", Transformation: {pays_transformation}" if pays_transformation else ""),
         ))
+        if not is_transformed:
+            # Node 3 → Node 6 : où a-t-il été cultivé ?
             result.parcours.append(StepLog(
+                node_id="node_3",
+                question="Quel est le niveau de transformation ?",
+                answer="Intrant brut/non transformé",
             ))
+            if _is_france(pays_production):
+                result.parcours.append(StepLog(
+                    node_id="node_6",
+                    question="Où l'intrant brut a-t-il été cultivé ?",
+                    answer="En France",
+                ))
+                result.node_resultat = "node_8"
+                result = _resolve_node_8(matiere_premiere, result)
+            else:
+                result.parcours.append(StepLog(
+                    node_id="node_6",
+                    question="Où l'intrant brut a-t-il été cultivé ?",
+                    answer=f"Hors France — {pays_production}",
+                ))
+                result.node_resultat = "node_9"
+                result = _resolve_node_9(matiere_premiere, pays_production, result)
+        else:
+            # Node 3 → Node 7 : où transformé + origine MP brute ?
             result.parcours.append(StepLog(
+                node_id="node_3",
+                question="Quel est le niveau de transformation ?",
+                answer="Coproduit/intrant transformé",
             ))
+            if _is_france(pays_transformation) and _is_france(pays_production):
+                result.parcours.append(StepLog(
+                    node_id="node_7",
+                    question="Où l'intrant a-t-il été transformé et d'où provient la MP brute ?",
+                    answer="Transformé en France à partir de MP brute française",
+                ))
+                result.node_resultat = "node_10"
+                result = _resolve_node_10(matiere_premiere, result)
+            elif _is_france(pays_transformation):
+                result.parcours.append(StepLog(
+                    node_id="node_7",
+                    question="Où l'intrant a-t-il été transformé et d'où provient la MP brute ?",
+                    answer=f"Transformé en France, MP brute de {pays_production or 'origine inconnue'}",
+                ))
+                result.node_resultat = "node_11"
+                result = _resolve_node_11(matiere_premiere, result)
+            else:
+                result.parcours.append(StepLog(
+                    node_id="node_7",
+                    question="Où l'intrant a-t-il été transformé et d'où provient la MP brute ?",
+                    answer=f"Transformé hors France — {pays_transformation}",
+                ))
+                result.node_resultat = "node_12"
+                result = _resolve_node_12(matiere_premiere, pays_transformation or pays_production or "", result)
+    # ------------------------------------------------------------------
+    # Post-processing : normaliser les unités (t CO2 eq / t produit)
+    # ------------------------------------------------------------------
+    if result.impact_kg_co2_eq is not None and result.unite_source:
+        if "tonne" in result.unite_source:
+            # GFLI : kg CO2 eq / tonne -> t CO2 eq / t
+            result.impact_tonne_co2_eq = result.impact_kg_co2_eq / 1000.0
         else:
+            # EcoALIM : kg CO2 eq / kg -> t CO2 eq / t (même valeur numérique)
+            result.impact_tonne_co2_eq = result.impact_kg_co2_eq
     # ------------------------------------------------------------------
     # Post-processing : collecter les candidats alternatifs
     # ------------------------------------------------------------------
     result = _collect_candidates(result)
+    # Demander au LLM quel candidat est le plus pertinent en cas de doute
+    if not result.match_exact and result.candidats_alternatifs:
+        try:
+            names = [c.get("nom", "") for c in result.candidats_alternatifs if c.get("nom")]
+            rank = llm_service.rank_candidates(result.matiere_premiere, names)
+            result.candidat_recommande = rank.get("best_name")
+            result.candidats_reflexion = rank.get("reasoning")
+        except Exception:
+            result.candidat_recommande = None
+            result.candidats_reflexion = None
     # Générer une justification LLM si le match n'est pas exact et qu'il n'y en a pas
     if not result.match_exact and not result.justification_alternative and not result.erreur:
         if result.intrant_utilise and result.impact_kg_co2_eq is not None:
     # Collecter depuis la source utilisée + l'autre source
     # D'abord la source principalement utilisée
+    unbounded = not result.match_exact
+    matiere_fr = llm_service.translate_matiere_to_french(matiere)
+    matiere_en = llm_service.translate_matiere_to_english(matiere)
     if "ECOALIM" in source.upper():
         candidates.extend(data_loader.get_top_ecoalim_candidates(
             matiere,
             pays_production=result.pays_production,
             pays_transformation=result.pays_transformation,
+            top_n=None if unbounded else 8,
         ))
+        if matiere_fr.lower() != matiere.lower():
+            candidates.extend(data_loader.get_top_ecoalim_candidates(
+                matiere_fr,
+                pays_production=result.pays_production,
+                pays_transformation=result.pays_transformation,
+                top_n=None if unbounded else 8,
+            ))
         candidates.extend(data_loader.get_top_gfli_candidates(
+            matiere, country_iso=country_iso, top_n=None if unbounded else 4,
         ))
+        if matiere_en.lower() != matiere.lower():
+            candidates.extend(data_loader.get_top_gfli_candidates(
+                matiere_en, country_iso=country_iso, top_n=None if unbounded else 4,
+            ))
     else:
         # Essayer aussi avec le nom traduit si on est sur GFLI
         # Le nom d'intrant utilisé contient le terme anglais
         intrant_base = result.intrant_utilise.split(",")[0].split("/")[0].strip()
         candidates.extend(data_loader.get_top_gfli_candidates(
+            intrant_base, country_iso=country_iso, top_n=None if unbounded else 8,
         ))
+        if matiere_en.lower() != matiere.lower():
+            candidates.extend(data_loader.get_top_gfli_candidates(
+                matiere_en, country_iso=country_iso, top_n=None if unbounded else 8,
+            ))
         candidates.extend(data_loader.get_top_ecoalim_candidates(
             matiere,
             pays_production=result.pays_production,
             pays_transformation=result.pays_transformation,
+            top_n=None if unbounded else 4,
         ))
+        if matiere_fr.lower() != matiere.lower():
+            candidates.extend(data_loader.get_top_ecoalim_candidates(
+                matiere_fr,
+                pays_production=result.pays_production,
+                pays_transformation=result.pays_transformation,
+                top_n=None if unbounded else 4,
+            ))
     # Dédupliquer, exclure l'intrant sélectionné, et filtrer les faux positifs
     seen = set()
             # Accepter quand même si ça matche le nom de base de l'intrant validé
             if intrant_base and _is_name_match(intrant_base, c["nom"]):
                 pass  # OK, même famille de produit
+            elif matiere_en and _is_name_match(matiere_en, c["nom"]):
+                pass  # OK, match en anglais
+            elif matiere_fr and _is_name_match(matiere_fr, c["nom"]):
+                pass  # OK, match en français
             else:
                 continue  # Faux positif
         seen.add(key)

src/llm_service.py CHANGED Viewed

@@ -33,6 +33,20 @@ def _chat(system_prompt: str, user_prompt: str) -> str:
     return response.choices[0].message.content.strip()
 # ============================================================================
 # 1. Déterminer si une matière est brute ou transformée
 # ============================================================================
@@ -147,6 +161,42 @@ Réponds UNIQUEMENT avec la traduction anglaise, rien d'autre."""
         return matiere_name
 def _prefilter_gfli_names(matiere: str, available_names: list) -> list:
     """Pré-filtre les noms GFLI par mots-clés pour réduire la liste envoyée au LLM."""
     # Correspondances FR -> EN pour pré-filtrage
@@ -303,6 +353,23 @@ def smart_search_ecoalim(
             }
         # Faux positif — on continue vers le LLM
     # Tentative via LLM
     match_info = find_matching_name_in_db(matiere, "ECOALIM")
     if match_info.get("matched_name") and match_info["matched_name"] != "AUCUN":
@@ -353,13 +420,32 @@ def smart_search_gfli(
         result = data_loader.get_gfli_climate_value(matiere_en, country_iso)
         if result:
             val, nom, source = result
-            return {
-                "valeur_kg_co2_eq_par_tonne": val,
-                "nom_intrant": nom,
-                "source": source,
-                "match_exact": True,
-                "justification": f"Traduction automatique : '{matiere}' → '{matiere_en}'",
-            }
     # Tentative via LLM
     match_info = find_matching_name_in_db(matiere, "GFLI")
@@ -381,3 +467,251 @@ def smart_search_gfli(
                 "llm_match_info": match_info,
             }
     return None

     return response.choices[0].message.content.strip()
+def _chat_powerful(system_prompt: str, user_prompt: str, temperature: float = 0.2) -> str:
+    """Appel au LLM Mistral avec modèle plus puissant pour analyses complexes."""
+    client = _get_client()
+    response = client.chat.complete(
+        model=config.MISTRAL_MODEL_POWERFUL,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        temperature=temperature,
+        max_tokens=3000,
+    )
+    return response.choices[0].message.content.strip()
 # ============================================================================
 # 1. Déterminer si une matière est brute ou transformée
 # ============================================================================
         return matiere_name
+def translate_matiere_to_french(matiere_name: str) -> str:
+    """Traduit un nom de matière première anglais vers le français pour EcoALIM."""
+    system_prompt = """Tu es un traducteur expert en alimentation animale.
+Traduis le nom de matière première anglais en français technique utilisé dans les bases de données
+d'alimentation animale (comme EcoALIM).
+Traductions courantes :
+- Wheat grain → Blé
+- Barley grain → Orge
+- Maize/Corn grain → Maïs
+- Sunflower meal → Tourteau de tournesol
+- Rapeseed meal → Tourteau de colza
+- Soybean meal → Tourteau de soja
+- Alfalfa → Luzerne
+- Rapeseed → Colza
+- Sunflower → Tournesol
+- Peas → Pois
+- Faba beans → Féverole
+- Bran → Son
+- Distillers grains → Drèche
+- Pulp → Pulpe
+- Oil → Huile
+- Meal/Flour → Tourteau/Farine
+- Dehulled → Décortiqué
+"""
+    user_prompt = f"""Traduis en français le nom suivant : "{matiere_name}".
+Réponds uniquement par la traduction (pas d'explication)."""
+    try:
+        response = _chat(system_prompt, user_prompt)
+        return response.strip().strip('"')
+    except Exception:
+        return matiere_name
 def _prefilter_gfli_names(matiere: str, available_names: list) -> list:
     """Pré-filtre les noms GFLI par mots-clés pour réduire la liste envoyée au LLM."""
     # Correspondances FR -> EN pour pré-filtrage
             }
         # Faux positif — on continue vers le LLM
+    # Tentative avec traduction EN->FR
+    matiere_fr = translate_matiere_to_french(matiere)
+    if matiere_fr.lower() != matiere.lower():
+        result = data_loader.get_ecoalim_climate_value(matiere_fr, pays_production, pays_transformation)
+        if not result:
+            result = data_loader.get_ecoalim_climate_value(matiere_fr)
+        if result:
+            val, nom, source = result
+            if data_loader.is_name_match(matiere_fr, nom):
+                return {
+                    "valeur_kg_co2_eq": val,
+                    "nom_intrant": nom,
+                    "source": source,
+                    "match_exact": False,
+                    "justification": f"Traduction automatique : '{matiere}' → '{matiere_fr}'",
+                }
     # Tentative via LLM
     match_info = find_matching_name_in_db(matiere, "ECOALIM")
     if match_info.get("matched_name") and match_info["matched_name"] != "AUCUN":
         result = data_loader.get_gfli_climate_value(matiere_en, country_iso)
         if result:
             val, nom, source = result
+            # Traduction nécessaire → pas un match exact
+            if data_loader.is_name_match(matiere_en, nom):
+                return {
+                    "valeur_kg_co2_eq_par_tonne": val,
+                    "nom_intrant": nom,
+                    "source": source,
+                    "match_exact": False,
+                    "justification": f"Traduction automatique : '{matiere}' → '{matiere_en}'",
+                }
+    # Tentative avec traduction EN->FR puis FR->EN (double sens)
+    matiere_fr = translate_matiere_to_french(matiere)
+    if matiere_fr.lower() != matiere.lower():
+        matiere_en2 = translate_matiere_to_english(matiere_fr)
+        if matiere_en2.lower() != matiere.lower() and matiere_en2.lower() != matiere_en.lower():
+            result = data_loader.get_gfli_climate_value(matiere_en2, country_iso)
+            if result:
+                val, nom, source = result
+                if data_loader.is_name_match(matiere_en2, nom):
+                    return {
+                        "valeur_kg_co2_eq_par_tonne": val,
+                        "nom_intrant": nom,
+                        "source": source,
+                        "match_exact": False,
+                        "justification": f"Traduction automatique : '{matiere}' → '{matiere_fr}' → '{matiere_en2}'",
+                    }
     # Tentative via LLM
     match_info = find_matching_name_in_db(matiere, "GFLI")
                 "llm_match_info": match_info,
             }
     return None
+def rank_candidates(matiere: str, candidates: list[str]) -> dict:
+    """
+    Demande au LLM quel candidat est le plus pertinent et pourquoi.
+    Retourne: {"best_name": "...", "reasoning": "..."}
+    """
+    if not candidates:
+        return {"best_name": "", "reasoning": ""}
+    # Garder une taille raisonnable pour le prompt
+    max_items = 40
+    truncated = len(candidates) > max_items
+    cand_list = candidates[:max_items]
+    system_prompt = """Tu es un expert en alimentation animale et en ACV.
+Tu dois choisir le candidat le plus pertinent parmi une liste, en tenant compte
+des synonymes et des langues (ex: tournesol = sunflower).
+Réponds UNIQUEMENT au format JSON :
+{"best_name": "...", "reasoning": "..."}
+"""
+    user_prompt = f"""Matière recherchée : "{matiere}"
+Liste de candidats :
+{chr(10).join('- ' + c for c in cand_list)}
+Choisis le meilleur candidat et explique brièvement (2-4 phrases)."""
+    if truncated:
+        user_prompt += "\n\nNote: la liste a été tronquée pour la requête."
+    try:
+        response = _chat(system_prompt, user_prompt)
+        import json
+        json_start = response.find("{")
+        json_end = response.rfind("}") + 1
+        parsed = json.loads(response[json_start:json_end])
+        return {
+            "best_name": parsed.get("best_name", ""),
+            "reasoning": parsed.get("reasoning", ""),
+        }
+    except Exception:
+        return {"best_name": "", "reasoning": ""}
+def find_similar_material(matiere: str, db_name: str = "GFLI") -> Optional[dict]:
+    """
+    Quand aucune matière exacte n'est trouvée, cherche une matière AVEC UN IMPACT CARBONE SIMILAIRE
+    (itinéraire technique et profil nutritionnel proches).
+    Retourne: {"similar_name": "...", "impact_kg_co2": value, "source": "...", "reasoning": "..."}
+    ou None si aucune suggestion
+    """
+    result = find_alternative_materials(matiere, db_name)
+    if result and result.get("combined"):
+        alt = result["combined"]
+        return {
+            "similar_name": alt["name"],
+            "impact_kg_co2": alt["impact"],
+            "source": alt["source"],
+            "reasoning": alt["reasoning"],
+        }
+    return None
+def find_alternative_materials(matiere: str, db_name: str = "GFLI", country_hint: Optional[str] = None) -> Optional[dict]:
+    """
+    Propose 4 alternatives quand une matière exacte n'est pas trouvée :
+    1. itinerary : même itinéraire technique (processus similaire, impact comparable)
+    2. locality : même localité/région de production (ou celle fournie en country_hint)
+    3. form : même forme structurelle (graine → graine, oléo → oléo, etc.)
+    4. combined : meilleur compromis réfléchi des 3 critères
+    Args:
+        matiere: Nom de la matière non trouvée
+        db_name: "GFLI" ou "ECOALIM"
+        country_hint: Pays optionnel pour guider la proposition de localité
+    Retourne: {
+        "itinerary": {"name": "...", "impact": value, "source": "...", "reasoning": "..."},
+        "locality": {...},
+        "form": {...},
+        "combined": {...}
+    }
+    ou None si erreur
+    """
+    if db_name == "GFLI":
+        # Récupérer tous les produits GFLI avec leurs valeurs
+        all_products = data_loader.get_gfli_base_products()
+        products_with_values = []
+        for prod in all_products[:100]:
+            val_tuple = data_loader.get_gfli_climate_value(prod)
+            if val_tuple:
+                val, nom, source = val_tuple
+                products_with_values.append({
+                    "name": nom,
+                    "impact": val,
+                    "source": source,
+                })
+        products_text = "\n".join(
+            f"- {p['name']}: {p['impact']:.2f} kg CO2 eq/t"
+            for p in products_with_values[:50]
+        )
+        system_prompt = """Tu es un expert en alimentation animale, biologie végétale, ACV et sourcing de matières premières.
+Une matière première n'a pas été trouvée dans la base GFLI.
+Tu dois proposer 4 alternatives avec des critères différents :
+1. ITINERARY (itinéraire technique) : même processus agricole/industriel, même impact carbone comparable
+   → Même type de culture (céréale, légumineuse, etc.), même irrigation, même type de récolte
+2. LOCALITY (localité) : même région/zone géographique de production (FR, BR, etc.)
+   → Même pays/région, même climat agricole, même disponibilité
+3. FORM (forme structurelle) : MÊME GENRE BOTANIQUE OU TRÈS PROCHE (priorité au genre)
+   → Épautre (Triticum dicoccum) = BLÉS/Wheat (genres Triticum, pas Hordeum/Barley)
+   → Orge (Hordeum vulgare) = rester Orge/Barley
+   → Pois (Pisum) = Pois/Pea, pas Broad beans ou autre légumineuse
+   → Graine générique → propose autres graines du MÊME genre si possible
+   → Légumineuse → autres légumineuses du même genre
+   → RÈGLE D'OR : respecter le genre botanique (Triticum ≠ Hordeum) !
+4. COMBINED (combo réfléchi) : MEILLEUR choix qui combine les 3 critères de manière cohérente
+   → OBLIGATOIRE : doit toujours avoir une réponse
+   → Souvent c'est une alternative qui balance bien itinerary+locality
+   → Si pas de perfect mix, choisir celui avec le meilleur itinerary + proche géographiquement
+Les valeurs en kg CO2 eq/t t'aident à évaluer les impacts.
+⚠️ IMPORTANT :
+- Retourne SEULEMENT les noms qui existent dans la liste
+- combined DOIT TOUJOURS avoir une valeur (ne pas le laisser vide/null)
+- FORM : PRIORITÉ stricte au genre botanique (Triticum→Wheat, Hordeum→Barley, Pisum→Pea, etc.)
+Réponds UNIQUEMENT au format JSON :
+{
+  "itinerary": {"name": "nom exact", "reasoning": "raison technique"},
+  "locality": {"name": "nom exact", "reasoning": "raison géographique"},
+  "form": {"name": "nom exact", "reasoning": "raison structurelle avec même genre botanique"},
+  "combined": {"name": "nom exact", "reasoning": "raison du meilleur compromis"}
+}"""
+        user_prompt = f"""Matière non trouvée : "{matiere}"
+Produits GFLI disponibles :
+{products_text}
+Propose 4 alternatives avec les 4 critères différents.
+⚠️ CRITICAL : Si la matière est épautre/blé (Triticum), propose un WHEAT (genre Triticum), PAS d'orge/barley !
+⚠️ IMPORTANT : combined DOIT TOUJOURS avoir une valeur (jamais null/vide) !"""
+        if country_hint:
+            user_prompt += f"\n⚠️ LOCALITÉ : Pays spécifié = {country_hint}. Privilégie une alternative produite dans ce pays ou proche (même région)."
+    else:  # EcoALIM
+        all_products = data_loader.get_ecoalim_matieres()
+        products_with_values = []
+        for prod in all_products[:100]:
+            val_tuple = data_loader.get_ecoalim_climate_value(prod)
+            if val_tuple:
+                val, nom, source = val_tuple
+                products_with_values.append({
+                    "name": nom,
+                    "impact": val * 1000,
+                    "source": source,
+                })
+        products_text = "\n".join(
+            f"- {p['name']}: {p['impact']:.2f} kg CO2 eq/t"
+            for p in products_with_values[:50]
+        )
+        system_prompt = """Tu es un expert en alimentation animale, ACV et sourcing.
+Une matière première n'a pas été trouvée dans EcoALIM.
+Propose 4 alternatives :
+1. ITINERARY : même itinéraire technique/process
+2. LOCALITY : même provenance géographique
+3. FORM : même catégorie structurelle
+4. COMBINED : meilleur compromis réfléchi
+Réponds UNIQUEMENT au format JSON avec les 4 alternatives."""
+        user_prompt = f"""Matière non trouvée : "{matiere}"
+Produits disponibles :
+{products_text}
+Propose 4 alternatives avec les 4 critères."""
+        if country_hint:
+            user_prompt += f"\n⚠️ LOCALITÉ : Pays spécifié = {country_hint}. Privilégie une alternative produite dans ce pays ou proche (même région)."
+    try:
+        response = _chat_powerful(system_prompt, user_prompt, temperature=0.3)
+        import json
+        json_start = response.find("{")
+        json_end = response.rfind("}") + 1
+        parsed = json.loads(response[json_start:json_end])
+        result_dict = {}
+        for criterion in ["itinerary", "locality", "form", "combined"]:
+            criterion_data = parsed.get(criterion, {})
+            similar_name = criterion_data.get("name")
+            reasoning = criterion_data.get("reasoning", "")
+            if not similar_name or similar_name.lower() == "null":
+                result_dict[criterion] = None
+                continue
+            # Récupérer la valeur de la matière
+            if db_name == "GFLI":
+                val_tuple = data_loader.get_gfli_climate_value(similar_name)
+                if val_tuple:
+                    val, nom, source = val_tuple
+                    result_dict[criterion] = {
+                        "name": nom,
+                        "impact": val,
+                        "source": source,
+                        "reasoning": reasoning,
+                    }
+            else:  # EcoALIM
+                val_tuple = data_loader.get_ecoalim_climate_value(similar_name)
+                if val_tuple:
+                    val, nom, source = val_tuple
+                    result_dict[criterion] = {
+                        "name": nom,
+                        "impact": val,
+                        "source": source,
+                        "reasoning": reasoning,
+                    }
+        # Fallback pour combined : si vide, utiliser itinerary (meilleur impact technique)
+        if not result_dict.get("combined") and result_dict.get("itinerary"):
+            result_dict["combined"] = {
+                "name": result_dict["itinerary"]["name"],
+                "impact": result_dict["itinerary"]["impact"],
+                "source": result_dict["itinerary"]["source"],
+                "reasoning": f"Meilleur compromis technique : {result_dict['itinerary']['reasoning']}"
+            }
+        if any(result_dict.values()):
+            return result_dict
+        return None
+    except Exception as e:
+        return None