freesound-popularity-interfaceTestMetadata

Sleeping

App Files Files Community

IKRAMELHADI commited on Feb 9

Commit

bb09077

1 Parent(s): 97483f5

testtest5

Browse files

Files changed (1) hide show

app.py +602 -284

app.py CHANGED Viewed

@@ -1,179 +1,169 @@
 import os
-import re
-import time
 import tempfile
-import joblib
 import numpy as np
 import pandas as pd
 import gradio as gr
-import opensmile
-import xgboost as xgb
 import soundfile as sf
 from pydub import AudioSegment
 import freesound
-# =========================
 # CONFIG
-# =========================
-MIN_EFFECT = 1
-MAX_EFFECT = 30
-MIN_MUSIC = 31
-MAX_MUSIC = 600
 SR_TARGET = 16000
-# Mets ton token FreeSound dans une variable d'environnement :
-# export FREESOUND_API_TOKEN="xxxxx"
-API_TOKEN = os.getenv("FREESOUND_API_TOKEN", "").strip()
-# Modèles openSMILE (les tiens)
-MODEL_EFFECT_PATH = "xgb_model_EffectSound.pkl"
-MODEL_MUSIC_PATH = "xgb_model_Music.pkl"
-MODEL_EFFECT = joblib.load(MODEL_EFFECT_PATH)
-MODEL_MUSIC = joblib.load(MODEL_MUSIC_PATH)
-RATING_DISPLAY_AUDIO = {
-    0: "❌ Informations manquantes",
-    1: "⭐ Faible",
-    2: "⭐⭐ Moyen",
-    3: "⭐⭐⭐ Élevé",
-}
-DOWNLOADS_DISPLAY_AUDIO = {
-    0: "⭐ Faible",
-    1: "⭐⭐ Moyen",
-    2: "⭐⭐⭐ Élevé",
-}
-SMILE = opensmile.Smile(
-    feature_set=opensmile.FeatureSet.eGeMAPSv02,
-    feature_level=opensmile.FeatureLevel.Functionals,
-)
-# =========================
-# UI helpers
-# =========================
 CSS = """
-#header-title { font-size: 28px; font-weight: 800; margin-bottom: 6px; }
-#header-sub { color:#444; margin-top:0; }
-.card {
-  border: 1px solid #e5e7eb; border-radius: 14px; padding: 14px 14px;
-  background: #fff; box-shadow: 0 3px 10px rgba(0,0,0,0.04);
-}
-.badge { display:inline-block; padding:6px 10px; border-radius:999px; font-weight:700; font-size:12px; }
-.badge.music { background:#eef2ff; color:#3730a3; }
-.badge.fx { background:#ecfeff; color:#155e75; }
-.kv { margin:6px 0; }
-.k { font-weight:700; }
-.hint { color:#6b7280; font-size:12px; margin-top:8px; }
-.err { color:#991b1b; font-weight:700; }
 """
-def html_error(title: str, msg: str) -> str:
     return f"""
-<div class="card">
-  <div class="err">❌ {title}</div>
-  <div style="margin-top:8px">{msg}</div>
 </div>
-"""
-def html_result(badge: str, duration: float, rating_text: str, downloads_text: str, extra_html: str = "") -> str:
-    klass = "music" if "Musique" in badge else "fx"
     return f"""
 <div class="card">
-  <div class="badge {klass}">{badge}</div>
-  <div class="kv"><span class="k">Durée :</span> {duration:.2f}s</div>
-  <div class="kv"><span class="k">Rating (classe) :</span> {rating_text}</div>
-  <div class="kv"><span class="k">Downloads (classe) :</span> {downloads_text}</div>
   {extra_html}
 </div>
-"""
 def interpret_results(avg_class: int, dl_class: int) -> str:
     if avg_class == 0:
-        return (
-            "ℹ️ <b>Interprétation</b> :<br>"
-            "Aucune évaluation possible (rating manquant)."
-        )
-    rating_txt = {1: "faible", 2: "moyenne", 3: "élevée"}.get(avg_class, "inconnue")
-    downloads_txt = {0: "faible", 1: "modérée", 2: "élevée"}.get(dl_class, "inconnue")
     if avg_class == 3 and dl_class == 2:
-        potentiel = "très fort"; detail = "contenu de haute qualité et très populaire."
     elif avg_class == 3 and dl_class == 1:
-        potentiel = "fort"; detail = "contenu bien apprécié, en croissance."
     elif avg_class == 3 and dl_class == 0:
-        potentiel = "prometteur"; detail = "bonne qualité mais faible visibilité (peut gagner en popularité)."
     elif avg_class == 2 and dl_class == 2:
-        potentiel = "modéré à fort"; detail = "populaire mais qualité perçue moyenne."
     elif avg_class == 2 and dl_class == 1:
-        potentiel = "modéré"; detail = "profil standard, popularité stable."
     elif avg_class == 2 and dl_class == 0:
-        potentiel = "limité"; detail = "engagement faible, diffusion limitée."
     elif avg_class == 1 and dl_class == 2:
-        potentiel = "contradictoire"; detail = "très téléchargé mais peu apprécié (usage pratique possible)."
     elif avg_class == 1 and dl_class == 1:
-        potentiel = "faible"; detail = "peu attractif pour les utilisateurs."
     else:
-        potentiel = "très faible"; detail = "faible intérêt global."
     return f"<b>Interprétation</b> :<br>Potentiel estimé : <b>{potentiel}</b> — {detail}"
-# =========================
-# FreeSound helpers
-# =========================
-def extract_freesound_id(url: str) -> int:
-    if not url or not url.strip():
-        raise ValueError("URL vide")
-    # accepte: https://freesound.org/s/123456/
-    m = re.search(r"/s/(\d+)", url)
-    if not m:
-        # fallback: dernier segment numérique
-        parts = [p for p in url.strip().rstrip("/").split("/") if p]
-        if not parts or not parts[-1].isdigit():
-            raise ValueError("Impossible d'extraire l'ID depuis l'URL")
-        return int(parts[-1])
-    return int(m.group(1))
-def get_fs_client() -> freesound.FreesoundClient:
-    if not API_TOKEN:
-        raise RuntimeError(
-            "Token FreeSound manquant. Mets-le dans FREESOUND_API_TOKEN (variable d'environnement)."
-        )
     c = freesound.FreesoundClient()
-    c.set_token(API_TOKEN, "token")
     return c
-def download_preview_with_retry(client: freesound.FreesoundClient, sound_id: int, tries: int = 4, sleep_base: float = 1.0):
-    """
-    Télécharge le preview FreeSound dans un fichier temporaire.
-    Retry simple (souvent utile quand FreeSound coupe / rate-limit).
-    """
-    last_err = None
-    for i in range(tries):
-        try:
-            snd = client.get_sound(sound_id)
-            # on force un mp3 (preview) -> pydub sait le lire (si ffmpeg dispo)
-            tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
-            tmp.close()
-            snd.retrieve_preview(tmp.name)
-            return tmp.name, snd
-        except Exception as e:
-            last_err = e
-            time.sleep(sleep_base * (2 ** i))
-    raise RuntimeError(f"Échec téléchargement preview après {tries} essais: {last_err}")
-# =========================
-# Audio helpers
-# =========================
-def get_duration_seconds(filepath: str) -> float:
     ext = os.path.splitext(filepath)[1].lower()
     if ext == ".mp3":
         audio = AudioSegment.from_file(filepath)
@@ -181,7 +171,7 @@ def get_duration_seconds(filepath: str) -> float:
     with sf.SoundFile(filepath) as f:
         return len(f) / f.samplerate
-def to_wav_16k_mono(filepath: str) -> str:
     ext = os.path.splitext(filepath)[1].lower()
     if ext == ".wav":
         try:
@@ -190,7 +180,6 @@ def to_wav_16k_mono(filepath: str) -> str:
                     return filepath
         except Exception:
             pass
     audio = AudioSegment.from_file(filepath)
     audio = audio.set_channels(1).set_frame_rate(SR_TARGET)
     tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
@@ -198,35 +187,13 @@ def to_wav_16k_mono(filepath: str) -> str:
     audio.export(tmp.name, format="wav")
     return tmp.name
-def extract_opensmile_features(filepath: str) -> pd.DataFrame:
     wav_path = to_wav_16k_mono(filepath)
     feats = SMILE.process_file(wav_path)
     feats = feats.select_dtypes(include=[np.number]).reset_index(drop=True)
     return feats
-def expected_feature_names(model) -> list[str]:
-    if hasattr(model, "estimators_"):  # multioutput wrapper
-        base = model.estimators_[0]
-        if hasattr(base, "feature_names_in_"):
-            return list(base.feature_names_in_)
-        # fallback xgb
-        if hasattr(base, "get_booster"):
-            bn = base.get_booster().feature_names
-            if bn:
-                return list(bn)
-    if hasattr(model, "feature_names_in_"):
-        return list(model.feature_names_in_)
-    if hasattr(model, "get_booster"):
-        bn = model.get_booster().feature_names
-        if bn:
-            return list(bn)
-    raise RuntimeError("Impossible de récupérer la liste des features attendues par le modèle.")
-def predict_with_dmatrix(model, X_df: pd.DataFrame) -> np.ndarray:
-    """
-    Robust contre: 'data did not contain feature names'
-    Supporte MultiOutput (estimators_)
-    """
     if hasattr(model, "estimators_"):
         preds = []
         for est in model.estimators_:
@@ -241,170 +208,521 @@ def predict_with_dmatrix(model, X_df: pd.DataFrame) -> np.ndarray:
     p = booster.predict(dm)
     return np.asarray(p).reshape(1, -1)
-# =========================
-# Main pipeline (URL -> download -> features -> align -> predict)
-# =========================
-def predict_from_freesound_url(url: str):
-    # 1) parse URL
-    try:
-        sound_id = extract_freesound_id(url)
-    except Exception as e:
-        return (
-            html_error("URL invalide", f"{e}"),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame()
-        )
-    # 2) API + download preview
     try:
-        client = get_fs_client()
-        audio_path, snd = download_preview_with_retry(client, sound_id)
     except Exception as e:
-        return (
-            html_error("Erreur FreeSound", f"Détail : <code>{e}</code>"),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame()
-        )
-    # 3) duration + model select
-    try:
-        duration = float(getattr(snd, "duration", None) or 0.0)
-        if duration <= 0:
-            duration = get_duration_seconds(audio_path)
-    except Exception as e:
-        return (
-            html_error("Audio illisible", f"Impossible de lire la durée.<br>Détail : <code>{e}</code>"),
-            pd.DataFrame(),
-            pd.DataFrame(),
-            pd.DataFrame()
-        )
     if duration < MIN_EFFECT:
-        return (
-            html_error(
-                "Audio trop court",
-                f"Durée détectée : <b>{duration:.2f} s</b><br><br>"
-                f"Plages acceptées :<br>"
-                f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>"
-                f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>"
-            ),
-            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
-        )
     if (MAX_EFFECT < duration < MIN_MUSIC) or duration > MAX_MUSIC:
-        return (
-            html_error(
-                "Audio hors plage",
-                f"Durée détectée : <b>{duration:.2f} s</b><br><br>"
-                f"Plages acceptées :<br>"
-                f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>"
-                f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>"
-            ),
-            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
-        )
     if duration <= MAX_EFFECT:
-        badge = "🔊 Effet sonore (URL FreeSound → openSMILE)"
-        model = MODEL_EFFECT
     else:
-        badge = "🎵 Musique (URL FreeSound → openSMILE)"
-        model = MODEL_MUSIC
-    # 4) extract openSMILE features (AVANT)
     try:
-        X_before = extract_opensmile_features(audio_path)
     except Exception as e:
-        return (
-            html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>"),
-            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
-        )
-    # 5) align features (APRÈS)
     try:
-        expected = expected_feature_names(model)
-        before_cols = list(X_before.columns)
-        X_after = X_before.reindex(columns=expected, fill_value=0)
-        missing_added = [c for c in expected if c not in before_cols]
-        extras_dropped = [c for c in before_cols if c not in expected]
-        diff_df = pd.DataFrame({
-            "missing_added_(filled_0)": pd.Series(missing_added, dtype="object"),
-            "extras_dropped": pd.Series(extras_dropped, dtype="object"),
-        })
     except Exception as e:
-        return (
-            html_error("Alignement des features échoué", f"Détail : <code>{e}</code>"),
-            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
-        )
-    # 6) predict
     try:
-        y = predict_with_dmatrix(model, X_after)
-        y = np.array(y)
-        avg_class = int(y[0, 0])
-        dl_class = int(y[0, 1])
     except Exception as e:
-        return (
-            html_error("Prédiction échouée", f"Détail : <code>{e}</code>"),
-            X_before, X_after, diff_df
-        )
-    rating_text = RATING_DISPLAY_AUDIO.get(avg_class, str(avg_class))
-    downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, str(dl_class))
-    conclusion = interpret_results(avg_class, dl_class)
     extra = f"""
-<div class="hint">ID FreeSound : <b>{sound_id}</b> · Preview téléchargé automatiquement</div>
 <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
-{conclusion}
 </div>
 """
-    return html_result(badge, duration, rating_text, downloads_text, extra_html=extra), X_before, X_after, diff_df
-# =========================
-# UI (fusion: 1 seule entrée URL)
-# =========================
-theme = gr.themes.Soft()
-with gr.Blocks(title="Prédiction popularité — URL FreeSound", css=CSS, theme=theme) as demo:
-    gr.HTML(
-        f"""
-<div id="header-title">Prédiction de popularité — URL FreeSound</div>
-<p id="header-sub">
-✅ Entrée = URL FreeSound → téléchargement preview → openSMILE → sélection auto du modèle → prédiction<br>
-<b>Durées acceptées :</b> 🔊 Effet sonore {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 Musique {MIN_MUSIC}–{MAX_MUSIC}s
-</p>
 """
     )
-    with gr.Row():
-        with gr.Column(scale=1):
-            url_in = gr.Textbox(
-                label="URL FreeSound",
-                placeholder="https://freesound.org/s/123456/",
-            )
-            btn = gr.Button("🚀 Prédire depuis l’URL", variant="primary")
-        with gr.Column(scale=1):
-            out_html = gr.HTML(label="Résultat")
-    gr.Markdown("## Features")
-    with gr.Row():
-        feat_before = gr.Dataframe(label="Features AVANT (openSMILE raw)", wrap=True, max_rows=20)
-        feat_after = gr.Dataframe(label="Features APRÈS (alignées modèle)", wrap=True, max_rows=20)
-    diff_out = gr.Dataframe(label="Diff (manquantes ajoutées / extras supprimées)", wrap=True, max_rows=50)
-    btn.click(
-        predict_from_freesound_url,
-        inputs=[url_in],
-        outputs=[out_html, feat_before, feat_after, diff_out],
-    )
 demo.launch()

 import os
 import tempfile
 import numpy as np
 import pandas as pd
 import gradio as gr
+import joblib
 import soundfile as sf
 from pydub import AudioSegment
+import opensmile
 import freesound
+import xgboost as xgb
+from sklearn.feature_extraction.text import HashingVectorizer
+# ============================================================
 # CONFIG
+# ============================================================
+MIN_EFFECT, MAX_EFFECT = 0.5, 3.0
+MIN_MUSIC, MAX_MUSIC = 10.0, 60.0
 SR_TARGET = 16000
+# HF Space Secret: FREESOUND_TOKEN
+FREESOUND_TOKEN = os.getenv("FREESOUND_TOKEN", "").strip()
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+def p(*parts):
+    return os.path.join(BASE_DIR, *parts)
+def load_local(path_rel: str):
+    full = p(path_rel)
+    if not os.path.exists(full):
+        raise FileNotFoundError(f"Fichier introuvable: {path_rel}")
+    return joblib.load(full)
+# ============================================================
+# UI
+# ============================================================
 CSS = """
+.card { border: 1px solid #e5e7eb; background: #ffffff; padding: 16px; border-radius: 16px; }
+.card-error{ border-color: #fca5a5; background: #fff1f2; }
+.card-title{ font-weight: 950; margin-bottom: 8px; }
+.badges{ display:flex; gap:10px; flex-wrap:wrap; margin-bottom:12px; }
+.badge{ padding:6px 10px; border-radius:999px; font-weight:900; font-size: 13px; border: 1px solid #e5e7eb; }
+.badge-type{ background:#eef2ff; color:#3730a3;}
+.badge-time{ background:#ecfeff; color:#155e75;}
+.grid{ display:grid; grid-template-columns: 1fr; gap:10px; }
+.box{ border:1px solid #e5e7eb; border-radius:14px; padding:12px; background:#fafafa; }
+.box-title{ font-weight:900; margin-bottom:4px; }
+.box-value{ font-size:18px; font-weight:800; }
+.hint{ margin-top:10px; color:#6b7280; font-size:12px; }
+#header-title { font-size: 28px; font-weight: 950; margin-bottom: 6px; }
+#header-sub { color:#6b7280; margin-top:0px; line-height:1.45; }
+pre{ white-space:pre-wrap; }
 """
+def html_error(title, body_html):
     return f"""
+<div class="card card-error">
+  <div class="card-title">❌ {title}</div>
+  <div>{body_html}</div>
 </div>
+""".strip()
+def html_result(badge_text, duration, rating_text, downloads_text, extra_html=""):
     return f"""
 <div class="card">
+  <div class="badges">
+    <span class="badge badge-type">{badge_text}</span>
+    <span class="badge badge-time">⏱️ {duration:.2f} s</span>
+  </div>
+  <div class="grid">
+    <div class="box">
+      <div class="box-title">📈 Popularité de la note moyenne</div>
+      <div class="box-value">{rating_text}</div>
+    </div>
+    <div class="box">
+      <div class="box-title">⬇️ Popularité des téléchargements</div>
+      <div class="box-value">{downloads_text}</div>
+    </div>
+  </div>
   {extra_html}
+  <div class="hint">Résultats en <b>niveaux</b> (faible / moyen / élevé), pas en valeurs exactes.</div>
 </div>
+""".strip()
 def interpret_results(avg_class: int, dl_class: int) -> str:
     if avg_class == 0:
+        return "ℹ️ <b>Interprétation</b> :<br>Aucune évaluation possible (rating manquant)."
     if avg_class == 3 and dl_class == 2:
+        potentiel, detail = "très fort", "contenu de haute qualité et très populaire."
     elif avg_class == 3 and dl_class == 1:
+        potentiel, detail = "fort", "contenu bien apprécié, en croissance."
     elif avg_class == 3 and dl_class == 0:
+        potentiel, detail = "prometteur", "bonne qualité mais faible visibilité."
     elif avg_class == 2 and dl_class == 2:
+        potentiel, detail = "modéré à fort", "populaire mais qualité perçue moyenne."
     elif avg_class == 2 and dl_class == 1:
+        potentiel, detail = "modéré", "profil standard, popularité stable."
     elif avg_class == 2 and dl_class == 0:
+        potentiel, detail = "limité", "engagement faible, diffusion limitée."
     elif avg_class == 1 and dl_class == 2:
+        potentiel, detail = "contradictoire", "très téléchargé mais peu apprécié."
     elif avg_class == 1 and dl_class == 1:
+        potentiel, detail = "faible", "peu attractif."
     else:
+        potentiel, detail = "très faible", "faible intérêt global."
     return f"<b>Interprétation</b> :<br>Potentiel estimé : <b>{potentiel}</b> — {detail}"
+def avg_label_to_class(avg_label: str) -> int:
+    if avg_label is None:
+        return 0
+    s = str(avg_label).strip().lower()
+    if "miss" in s or "missing" in s or "none" in s or "no" in s:
+        return 0
+    if "high" in s or "élev" in s or "eleve" in s:
+        return 3
+    if "medium" in s or "moy" in s:
+        return 2
+    if "low" in s or "faibl" in s:
+        return 1
+    return 0
+def safe_float(v):
+    try:
+        return float(v)
+    except Exception:
+        return 0.0
+def parse_sound_id(url: str):
+    return int(url.rstrip("/").split("/")[-1])
+# ============================================================
+# FREESOUND CLIENT
+# ============================================================
+def get_fs_client():
+    if not FREESOUND_TOKEN:
+        raise RuntimeError("Token FreeSound manquant. Ajoute le secret FREESOUND_TOKEN dans le Space.")
     c = freesound.FreesoundClient()
+    c.set_token(FREESOUND_TOKEN, "token")
     return c
+# ============================================================
+# PARTIE A — Upload audio → openSMILE → modèles
+# (depuis app (2).py)
+# ============================================================
+MODEL_EFFECT_A = load_local("xgb_model_EffectSound.pkl")
+MODEL_MUSIC_A  = load_local("xgb_model_Music.pkl")
+RATING_DISPLAY_AUDIO = {0: "❌ Informations manquantes", 1: "⭐ Faible", 2: "⭐⭐ Moyen", 3: "⭐⭐⭐ Élevé"}
+DOWNLOADS_DISPLAY_AUDIO = {0: "⭐ Faible", 1: "⭐⭐ Moyen", 2: "⭐⭐⭐ Élevé"}
+SMILE = opensmile.Smile(
+    feature_set=opensmile.FeatureSet.eGeMAPSv02,
+    feature_level=opensmile.FeatureLevel.Functionals,
+)
+def get_duration_seconds(filepath):
     ext = os.path.splitext(filepath)[1].lower()
     if ext == ".mp3":
         audio = AudioSegment.from_file(filepath)
     with sf.SoundFile(filepath) as f:
         return len(f) / f.samplerate
+def to_wav_16k_mono(filepath):
     ext = os.path.splitext(filepath)[1].lower()
     if ext == ".wav":
         try:
                     return filepath
         except Exception:
             pass
     audio = AudioSegment.from_file(filepath)
     audio = audio.set_channels(1).set_frame_rate(SR_TARGET)
     tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     audio.export(tmp.name, format="wav")
     return tmp.name
+def extract_opensmile_features(filepath):
     wav_path = to_wav_16k_mono(filepath)
     feats = SMILE.process_file(wav_path)
     feats = feats.select_dtypes(include=[np.number]).reset_index(drop=True)
     return feats
+def predict_upload_with_dmatrix(model, X_df: pd.DataFrame):
     if hasattr(model, "estimators_"):
         preds = []
         for est in model.estimators_:
     p = booster.predict(dm)
     return np.asarray(p).reshape(1, -1)
+def predict_opensmile_upload(audio_file):
+    if audio_file is None:
+        return html_error("Aucun fichier", "Veuillez importer un fichier audio (wav, mp3, flac…).")
     try:
+        duration = get_duration_seconds(audio_file)
     except Exception as e:
+        return html_error("Audio illisible", f"Impossible de lire l'audio.<br>Détail : <code>{e}</code>")
     if duration < MIN_EFFECT:
+        return html_error("Audio trop court",
+                          f"Durée : <b>{duration:.2f}s</b><br>Accepté: 0.5–3s ou 10–60s")
     if (MAX_EFFECT < duration < MIN_MUSIC) or duration > MAX_MUSIC:
+        return html_error("Audio hors plage",
+                          f"Durée : <b>{duration:.2f}s</b><br>Accepté: 0.5–3s ou 10–60s")
     if duration <= MAX_EFFECT:
+        badge = "🔊 OpenSMILE (upload) — EffectSound"
+        model = MODEL_EFFECT_A
     else:
+        badge = "🎵 OpenSMILE (upload) — Music"
+        model = MODEL_MUSIC_A
     try:
+        X = extract_opensmile_features(audio_file)
     except Exception as e:
+        return html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>")
+    # Align features
     try:
+        expected = model.estimators_[0].feature_names_in_ if hasattr(model, "estimators_") else model.feature_names_in_
+        X = X.reindex(columns=list(expected), fill_value=0)
     except Exception as e:
+        return html_error("Alignement features échoué", f"Détail : <code>{e}</code>")
     try:
+        y = predict_upload_with_dmatrix(model, X)
     except Exception as e:
+        return html_error("Prédiction échouée", f"Détail : <code>{e}</code>")
+    y = np.array(y)
+    avg_class = int(y[0, 0])
+    dl_class = int(y[0, 1])
+    rating_text = RATING_DISPLAY_AUDIO.get(avg_class, "Inconnu")
+    downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, "Inconnu")
     extra = f"""
 <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
+{interpret_results(avg_class, dl_class)}
 </div>
 """
+    return html_result(badge, duration, rating_text, downloads_text, extra_html=extra)
+# ============================================================
+# PARTIE B — FreeSound URL → “Acoustic features API fields” → modèles
+# (depuis app (2).py / PARTIE B)
+# ============================================================
+xgb_music_num_B       = load_local("xgb_num_downloads_music_model.pkl")
+xgb_music_feat_num_B  = load_local("xgb_num_downloads_music_features.pkl")
+xgb_music_avg_B       = load_local("xgb_avg_rating_music_model.pkl")
+xgb_music_feat_avg_B  = load_local("xgb_avg_rating_music_features.pkl")
+le_music_avg_B        = load_local("xgb_avg_rating_music_label_encoder.pkl")
+xgb_effect_num_B      = load_local("xgb_num_downloads_effectsound_model.pkl")
+xgb_effect_feat_num_B = load_local("xgb_num_downloads_effectsound_features.pkl")
+xgb_effect_avg_B      = load_local("xgb_avg_rating_effectsound_model.pkl")
+xgb_effect_feat_avg_B = load_local("xgb_avg_rating_effectsound_features.pkl")
+le_effect_avg_B       = load_local("xgb_avg_rating_effectsound_label_encoder.pkl")
+NUM_DOWNLOADS_MAP_B = {0: "Faible", 1: "Moyen", 2: "Élevé"}
+def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None):
+    row = []
+    for col in feat_list:
+        val = features_dict.get(col, 0)
+        if val is None or isinstance(val, (list, dict)):
+            val = 0
+        row.append(safe_float(val))
+    X = pd.DataFrame([row], columns=feat_list)
+    dmatrix = xgb.DMatrix(X.values, feature_names=feat_list)
+    booster = model.get_booster() if hasattr(model, "get_booster") else model
+    pred_int = int(booster.predict(dmatrix)[0])
+    if label_encoder is not None:
+        return label_encoder.inverse_transform([pred_int])[0]
+    return pred_int
+def predict_freesound_acoustic_features(url: str):
+    if not url or not url.strip():
+        return html_error("URL vide", "Colle une URL du type <code>https://freesound.org/s/123456/</code>")
+    try:
+        sound_id = parse_sound_id(url)
+    except Exception:
+        return html_error("URL invalide", "Impossible d'extraire l'ID depuis l'URL.")
+    try:
+        fs_client = get_fs_client()
+    except Exception as e:
+        return html_error("Token FreeSound", str(e))
+    all_features = list(set(
+        xgb_music_feat_num_B + xgb_music_feat_avg_B + xgb_effect_feat_num_B + xgb_effect_feat_avg_B
+    ))
+    fields = "duration," + ",".join(all_features)
+    try:
+        results = fs_client.search(query="", filter=f"id:{sound_id}", fields=fields)
+    except Exception as e:
+        return html_error("Erreur API FreeSound", f"Détail : <code>{e}</code>")
+    if len(results.results) == 0:
+        return html_error("Son introuvable", "Aucun résultat pour cet ID.")
+    sound = results.results[0]
+    duration = safe_float(sound.get("duration", 0))
+    if MIN_EFFECT <= duration <= MAX_EFFECT:
+        badge = "🔊 FreeSound (API features acoustiques) — EffectSound"
+        dl_class = int(predict_with_model_fs(xgb_effect_num_B, sound, xgb_effect_feat_num_B))
+        avg_text = str(predict_with_model_fs(xgb_effect_avg_B, sound, xgb_effect_feat_avg_B, le_effect_avg_B))
+        dl_text = NUM_DOWNLOADS_MAP_B.get(dl_class, str(dl_class))
+        avg_class = avg_label_to_class(avg_text)
+        extra = f"""
+<div class="hint">ID FreeSound : <b>{sound_id}</b></div>
+<div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
+{interpret_results(avg_class, dl_class)}
+</div>
 """
+        return html_result(badge, duration, avg_text, dl_text, extra_html=extra)
+    if MIN_MUSIC <= duration <= MAX_MUSIC:
+        badge = "🎵 FreeSound (API features acoustiques) — Music"
+        dl_class = int(predict_with_model_fs(xgb_music_num_B, sound, xgb_music_feat_num_B))
+        avg_text = str(predict_with_model_fs(xgb_music_avg_B, sound, xgb_music_feat_avg_B, le_music_avg_B))
+        dl_text = NUM_DOWNLOADS_MAP_B.get(dl_class, str(dl_class))
+        avg_class = avg_label_to_class(avg_text)
+        extra = f"""
+<div class="hint">ID FreeSound : <b>{sound_id}</b></div>
+<div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
+{interpret_results(avg_class, dl_class)}
+</div>
+"""
+        return html_result(badge, duration, avg_text, dl_text, extra_html=extra)
+    return html_error("Durée non supportée",
+                      f"Durée : <b>{duration:.2f}s</b><br>Accepté: 0.5–3s ou 10–60s")
+# ============================================================
+# PARTIE C — FreeSound URL → Metadata preprocessing lourd → modèles
+# (depuis app (3).py)
+# ============================================================
+# ---- objets de preprocessing (local) ----
+# Music
+scaler_samplerate_music   = load_local("music/scaler_music_samplerate.joblib")
+scaler_age_days_music     = load_local("music/scaler_music_age_days_log.joblib")
+username_freq_music       = load_local("music/username_freq_dict_music.joblib")
+est_num_downloads_music   = load_local("music/est_num_downloads_music.joblib")
+avg_rating_transformer_music = load_local("music/avg_rating_transformer_music.joblib")
+music_subcategory_cols    = load_local("music/music_subcategory_cols.joblib")
+music_onehot_cols         = load_local("music/music_onehot_cols.joblib")
+music_onehot_tags         = load_local("music/music_onehot_tags.joblib")
+# EffectSound
+scaler_samplerate_effect  = load_local("effectSound/scaler_effectSamplerate.joblib")
+scaler_age_days_effect    = load_local("effectSound/scaler_effectSound_age_days_log.joblib")
+username_freq_effect      = load_local("effectSound/username_freq_dict_effectSound.joblib")
+est_num_downloads_effect  = load_local("effectSound/est_num_downloads_effectSound.joblib")
+avg_rating_transformer_effect = load_local("effectSound/avg_rating_transformer_effectSound.joblib")
+effect_subcategory_cols   = load_local("effectSound/effectSound_subcategory_cols.joblib")
+effect_onehot_cols        = load_local("effectSound/effectSound_onehot_cols.joblib")
+effect_onehot_tags        = load_local("effectSound/effect_onehot_tags.joblib")
+# ---- modèles metadata (local) ----
+music_model_num_downloads_C = load_local("music_model_num_downloads.joblib")
+music_model_avg_rating_C    = load_local("music_xgb_avg_rating.joblib")
+music_avg_rating_le_C       = load_local("music_xgb_avg_rating_label_encoder.joblib")
+music_model_features_C      = load_local("music_model_features_list.joblib")
+effect_model_num_downloads_C = load_local("effectSound_model_num_downloads.joblib")
+effect_model_avg_rating_C    = load_local("effectSound_xgb_avg_rating.joblib")
+effect_avg_rating_le_C       = load_local("effectSound_xgb_avg_rating_label_encoder.joblib")
+effect_model_features_C      = load_local("effect_model_features_list.joblib")
+# Dedup des listes (comme ton script)
+music_model_features_C = list(dict.fromkeys(music_model_features_C))
+effect_model_features_C = list(dict.fromkeys(effect_model_features_C))
+# ---- GloVe local (optionnel) ----
+# Mets un fichier local et indique son chemin via GLOVE_PATH si tu veux.
+# Exemple: GLOVE_PATH="models/glove.kv"
+GLOVE_PATH = os.getenv("GLOVE_PATH", "").strip()
+glove_model = None
+def try_load_glove():
+    global glove_model
+    if not GLOVE_PATH:
+        glove_model = None
+        return
+    full = p(GLOVE_PATH)
+    if not os.path.exists(full):
+        glove_model = None
+        return
+    try:
+        import gensim
+        from gensim.models import KeyedVectors
+        glove_model = KeyedVectors.load(full, mmap="r")
+    except Exception:
+        glove_model = None
+try_load_glove()
+def description_to_vec(text, model, dim=100):
+    if model is None or not text:
+        return np.zeros(dim, dtype=float)
+    words = text.lower().split()
+    vecs = [model[w] for w in words if w in model]
+    if len(vecs) == 0:
+        return np.zeros(dim, dtype=float)
+    return np.mean(vecs, axis=0)
+def preprocess_name(df, vec_dim=8):
+    df = df.copy()
+    df["name_len"] = df["name_clean"].str.len()
+    vectorizer = HashingVectorizer(n_features=vec_dim, alternate_sign=False, norm=None)
+    name_vec_sparse = vectorizer.transform(df["name_clean"])
+    name_vec_df = pd.DataFrame(
+        name_vec_sparse.toarray(),
+        columns=[f"name_vec_{i}" for i in range(vec_dim)],
+        index=df.index
     )
+    df = pd.concat([df, name_vec_df], axis=1)
+    return df
+def fetch_sound_metadata(fs_client, sound_url):
+    sound_id = parse_sound_id(sound_url)
+    sound = fs_client.get_sound(sound_id)
+    data = {
+        "id": sound_id,
+        "name": sound.name,
+        "num_ratings": getattr(sound, "num_ratings", 0),
+        "tags": ",".join(sound.tags) if getattr(sound, "tags", None) else "",
+        "username": getattr(sound, "username", ""),
+        "description": getattr(sound, "description", "") or "",
+        "created": getattr(sound, "created", ""),
+        "license": getattr(sound, "license", ""),
+        "num_downloads": getattr(sound, "num_downloads", 0),
+        "channels": getattr(sound, "channels", 0),
+        "filesize": getattr(sound, "filesize", 0),
+        "num_comments": getattr(sound, "num_comments", 0),
+        "category_is_user_provided": getattr(sound, "category_is_user_provided", 0),
+        "duration": getattr(sound, "duration", 0),
+        "avg_rating": getattr(sound, "avg_rating", 0),
+        "category": getattr(sound, "category", "Unknown"),
+        "subcategory": getattr(sound, "subcategory", "Other"),
+        "type": getattr(sound, "type", ""),
+        "samplerate": getattr(sound, "samplerate", 0)
+    }
+    return pd.DataFrame([data])
+def preprocess_sound_metadata(df):
+    df = df.copy()
+    dur = float(df["duration"].iloc[0])
+    if MIN_EFFECT <= dur <= MAX_EFFECT:
+        dataset_type = "effectSound"
+        scaler_samplerate = scaler_samplerate_effect
+        scaler_age = scaler_age_days_effect
+        username_freq = username_freq_effect
+        est_num_downloads = est_num_downloads_effect
+        avg_rating_transformer = avg_rating_transformer_effect
+        subcat_cols = effect_subcategory_cols
+        onehot_cols = effect_onehot_cols
+        onehot_tags = effect_onehot_tags
+    elif MIN_MUSIC <= dur <= MAX_MUSIC:
+        dataset_type = "music"
+        scaler_samplerate = scaler_samplerate_music
+        scaler_age = scaler_age_days_music
+        username_freq = username_freq_music
+        est_num_downloads = est_num_downloads_music
+        avg_rating_transformer = avg_rating_transformer_music
+        subcat_cols = music_subcategory_cols
+        onehot_cols = music_onehot_cols
+        onehot_tags = music_onehot_tags
+    else:
+        return None, f"Durée hors plage ({dur:.2f}s)."
+    # Category bool
+    df["category_is_user_provided"] = df["category_is_user_provided"].astype(int)
+    # Username frequency
+    df["username_freq"] = df["username"].map(username_freq).fillna(0)
+    # Numeric log1p
+    for col in ["num_ratings", "num_comments", "filesize", "duration"]:
+        df[col] = np.log1p(df[col])
+    # samplerate scaled
+    df["samplerate"] = scaler_samplerate.transform(df[["samplerate"]])
+    # age_days
+    df["created"] = pd.to_datetime(df["created"], errors="coerce").dt.tz_localize(None)
+    df["age_days"] = (pd.Timestamp.now() - df["created"]).dt.days
+    df["age_days_log"] = np.log1p(df["age_days"])
+    df["age_days_log_scaled"] = scaler_age.transform(df[["age_days_log"]])
+    df = df.drop(columns=["created", "age_days", "age_days_log"], errors="ignore")
+    # num_downloads_class
+    df["num_downloads_class"] = est_num_downloads.transform(df[["num_downloads"]])
+    # avg_rating transform
+    df["avg_rating"] = avg_rating_transformer.transform(df["avg_rating"].to_numpy())
+    # Subcategory one-hot
+    for col in subcat_cols:
+        df[col] = 0
+    subcat_val = df["subcategory"].iloc[0]
+    for col in subcat_cols:
+        cat_name = col.replace("subcategory_", "")
+        if subcat_val == cat_name:
+            df[col] = 1
+    df.drop(columns=["subcategory"], inplace=True, errors="ignore")
+    # onehot fixed columns
+    for col in onehot_cols:
+        if col not in df.columns:
+            df[col] = 0
+    license_val = df.loc[0, "license"]
+    category_val = df.loc[0, "category"]
+    type_val = df.loc[0, "type"]
+    for col_name in [f"license_{license_val}", f"category_{category_val}", f"type_{type_val}"]:
+        if col_name in df.columns:
+            df[col_name] = 1
+    # Tags one-hot
+    for col in ["name", "tags", "description"]:
+        if col not in df.columns:
+            df[col] = ""
+    for col in onehot_tags:
+        if col not in df.columns:
+            df[col] = 0
+    tags_list = df["tags"].iloc[0].lower().split(",") if df["tags"].iloc[0] else []
+    for col in onehot_tags:
+        tag_name = col.replace("tag_", "").lower()
+        if tag_name in tags_list:
+            df[col] = 1
+    df.drop(columns=["tags"], inplace=True, errors="ignore")
+    # Name hashing
+    df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(".", n=1).str[0]
+    df = preprocess_name(df, vec_dim=8)
+    df.drop(columns=["name", "name_clean"], inplace=True, errors="ignore")
+    # Description vectors (GloVe local si dispo, sinon zeros)
+    desc_vec = description_to_vec(df["description"].iloc[0], glove_model, dim=100)
+    for i in range(100):
+        df[f"description_glove_{i}"] = float(desc_vec[i])
+    df.drop(columns=["description"], inplace=True, errors="ignore")
+    # drop unused raw cols
+    df.drop(columns=["license","category","type","subcategory","id","num_downloads","file_path","username"],
+            inplace=True, errors="ignore")
+    return df, dataset_type
+def predict_with_model_df(model, df_input, model_features, le=None):
+    booster_feats = model.get_booster().feature_names
+    X_aligned = df_input.reindex(columns=booster_feats, fill_value=0.0).astype(float)
+    dmatrix = xgb.DMatrix(X_aligned.values, feature_names=booster_feats)
+    preds = model.get_booster().predict(dmatrix)
+    pred_val = preds[0]
+    if len(preds.shape) > 1 and preds.shape[1] > 1:
+        pred_int = int(np.argmax(pred_val))
+    else:
+        pred_int = int(round(float(pred_val)))
+    if le is not None:
+        try:
+            return le.inverse_transform([pred_int])[0]
+        except Exception:
+            return f"Classe inconnue ({pred_int})"
+    return pred_int
+def predict_freesound_metadata(url: str, show_debug: bool):
+    if not url or not url.strip():
+        return html_error("URL vide", "Colle une URL du type <code>https://freesound.org/s/123456/</code>")
+    try:
+        sound_id = parse_sound_id(url)
+    except Exception:
+        return html_error("URL invalide", "Impossible d'extraire l'ID depuis l'URL.")
+    try:
+        fs_client = get_fs_client()
+    except Exception as e:
+        return html_error("Token FreeSound", str(e))
+    try:
+        df_raw = fetch_sound_metadata(fs_client, url)
+    except Exception as e:
+        return html_error("Erreur API FreeSound", f"Détail : <code>{e}</code>")
+    dur = float(df_raw["duration"].iloc[0])
+    if dur < MIN_EFFECT:
+        return html_error("Son trop court", f"Durée {dur:.2f}s. Accepté: 0.5–3s ou 10–60s")
+    if (MAX_EFFECT < dur < MIN_MUSIC) or dur > MAX_MUSIC:
+        return html_error("Son hors plage", f"Durée {dur:.2f}s. Accepté: 0.5–3s ou 10–60s")
+    df_processed, dataset_type = preprocess_sound_metadata(df_raw)
+    if df_processed is None:
+        return html_error("Preprocessing metadata", "Impossible de prétraiter (durée hors plage).")
+    # Choix modèles / features selon type
+    if dataset_type == "effectSound":
+        badge = "🔊 FreeSound (metadata) — EffectSound"
+        model_nd = effect_model_num_downloads_C
+        model_ar = effect_model_avg_rating_C
+        model_features = effect_model_features_C
+        le = effect_avg_rating_le_C
+    else:
+        badge = "🎵 FreeSound (metadata) — Music"
+        model_nd = music_model_num_downloads_C
+        model_ar = music_model_avg_rating_C
+        model_features = music_model_features_C
+        le = music_avg_rating_le_C
+    # IMPORTANT: tu faisais drop avg_rating + num_downloads_class avant le modèle
+    cols_to_remove = ["avg_rating", "num_downloads_class"]
+    df_for_model = df_processed.drop(columns=[c for c in cols_to_remove if c in df_processed.columns], errors="ignore")
+    # Forcer exactement les colonnes du modèle
+    df_for_model = df_for_model.reindex(columns=model_features, fill_value=0.0).astype(float)
+    pred_num_downloads_val = predict_with_model_df(model_nd, df_for_model, model_features, le=None)
+    num_map = {0: "Low", 1: "Medium", 2: "High"}
+    pred_num_downloads = num_map.get(pred_num_downloads_val, str(pred_num_downloads_val))
+    pred_avg_rating = predict_with_model_df(model_ar, df_for_model, model_features, le=le)
+    avg_class = avg_label_to_class(pred_avg_rating)
+    dl_class_for_interp = {"Low":0,"Medium":1,"High":2}.get(pred_num_downloads, 1)
+    debug_html = ""
+    if show_debug:
+        raw_txt = "\n".join([f"{c}: {df_raw.loc[0,c]}" for c in df_raw.columns])
+        proc_txt = "\n".join([f"{c}: {df_processed.loc[0,c]}" for c in df_processed.columns[:120]])  # limite affichage
+        glove_note = "OK" if glove_model is not None else "ABSENT (vecteurs à 0)"
+        debug_html = f"""
+<div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
+  <div class="hint"><b>Debug</b> — GloVe: <b>{glove_note}</b></div>
+  <details><summary>Voir métadonnées brutes</summary><pre>{raw_txt}</pre></details>
+  <details><summary>Voir features après preprocessing (aperçu)</summary><pre>{proc_txt}</pre></details>
+</div>
+"""
+    extra = f"""
+<div class="hint">ID FreeSound : <b>{sound_id}</b></div>
+<div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
+{interpret_results(avg_class, dl_class_for_interp)}
+</div>
+{debug_html}
+"""
+    return html_result(badge, dur, str(pred_avg_rating), str(pred_num_downloads), extra_html=extra)
+# ============================================================
+# GRADIO APP (3 onglets)
+# ============================================================
+with gr.Blocks(title="Popularité FreeSound — 3 pipelines", css=CSS, theme=gr.themes.Soft()) as demo:
+    gr.HTML(f"""
+<div id="header-title">Popularité FreeSound — 3 pipelines</div>
+<p id="header-sub">
+<b>A)</b> Upload audio → <b>OpenSMILE</b><br>
+<b>B)</b> URL FreeSound → <b>Features acoustiques via API fields</b><br>
+<b>C)</b> URL FreeSound → <b>Metadata + preprocessing</b><br><br>
+<b>Durées acceptées :</b> 🔊 {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 {MIN_MUSIC}–{MAX_MUSIC}s
+</p>
+""")
+    with gr.Tabs():
+        with gr.Tab("A) Upload → OpenSMILE"):
+            with gr.Row():
+                with gr.Column():
+                    audio_in = gr.Audio(type="filepath", label="Fichier audio")
+                    btn = gr.Button("🚀 Prédire (OpenSMILE)", variant="primary")
+                with gr.Column():
+                    out = gr.HTML()
+            btn.click(predict_opensmile_upload, inputs=audio_in, outputs=out)
+        with gr.Tab("B) URL → Features acoustiques (API)"):
+            with gr.Row():
+                with gr.Column():
+                    url_in = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/")
+                    btn = gr.Button("🚀 Prédire (Features API)", variant="primary")
+                with gr.Column():
+                    out = gr.HTML()
+            btn.click(predict_freesound_acoustic_features, inputs=url_in, outputs=out)
+        with gr.Tab("C) URL → Metadata (prétraitement)"):
+            with gr.Row():
+                with gr.Column():
+                    url_in = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/")
+                    show_debug = gr.Checkbox(label="Afficher debug (brut + aperçu features)", value=False)
+                    btn = gr.Button("🚀 Prédire (Metadata)", variant="primary")
+                with gr.Column():
+                    out = gr.HTML()
+            btn.click(predict_freesound_metadata, inputs=[url_in, show_debug], outputs=out)
 demo.launch()