EffectSound_Music_Popularity

Sleeping

App Files Files Community

IKRAMELHADI commited on Feb 9

Commit

97483f5

1 Parent(s): 49de9df

testtest5

Browse files

Files changed (12) hide show

app.py +370 -273
xgb_avg_rating_effectsound_label_encoder.pkl +3 -0
xgb_avg_rating_effectsound_model.pkl +3 -0
xgb_avg_rating_music_features.pkl +3 -0
xgb_avg_rating_music_label_encoder.pkl +3 -0
xgb_avg_rating_music_model.pkl +3 -0
xgb_model_EffectSound.pkl +3 -0
xgb_model_Music.pkl +3 -0
xgb_num_downloads_effectsound_features.pkl +3 -0
xgb_num_downloads_effectsound_model.pkl +3 -0
xgb_num_downloads_music_features.pkl +3 -0
xgb_num_downloads_music_model.pkl +3 -0

app.py CHANGED Viewed

@@ -1,313 +1,410 @@
-# freesound_preprocess_ui.py
-# -*- coding: utf-8 -*-
 import os
 import re
 import time
-import urllib.parse
-from typing import Any, Dict, Tuple, Optional, List
 import numpy as np
 import pandas as pd
-import requests
 import gradio as gr
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.preprocessing import StandardScaler
-# ----------------------------
-# Robust network helpers
-# ----------------------------
-DEFAULT_TIMEOUT = 20
-def _session() -> requests.Session:
-    s = requests.Session()
-    s.headers.update({
-        "User-Agent": "Mozilla/5.0 (freesound-metadata-preprocess/1.0)",
-        "Accept": "application/json,text/plain,*/*",
-        "Connection": "keep-alive",
-    })
-    return s
-def fetch_json_with_retry(
-    url: str,
-    headers: Dict[str, str],
-    max_retries: int = 6,
-    base_sleep: float = 0.8,
-    timeout: int = DEFAULT_TIMEOUT,
-) -> Dict[str, Any]:
     """
-    GET JSON robuste: gère 429 (rate limit), 5xx et déconnexions.
     """
-    sess = _session()
     last_err = None
-    for attempt in range(max_retries):
         try:
-            resp = sess.get(url, headers=headers, timeout=timeout)
-            # rate limit
-            if resp.status_code == 429:
-                time.sleep(base_sleep * (2 ** attempt))
-                continue
-            # serveur instable
-            if resp.status_code >= 500:
-                time.sleep(base_sleep * (2 ** attempt))
-                continue
-            resp.raise_for_status()
-            return resp.json()
         except Exception as e:
             last_err = e
-            time.sleep(base_sleep * (2 ** attempt))
-    raise RuntimeError(f"Échec requête après {max_retries} essais. Dernière erreur: {last_err}")
-# ----------------------------
-# URL -> sound_id -> API endpoint
-# ----------------------------
-def sound_id_from_freesound_page(url: str) -> int:
-    """
-    Extrait l'ID depuis une URL FreeSound de page son:
-    https://freesound.org/people/.../sounds/<id>/
-    """
-    u = url.strip()
-    u = urllib.parse.unquote(u)
-    m = re.search(r"freesound\.org\/.*\/sounds\/(\d+)\/?", u)
-    if not m:
-        # si l'utilisateur colle juste l'ID (optionnel)
-        if re.fullmatch(r"\d+", u):
-            return int(u)
-        raise ValueError("URL non reconnue. Colle l’URL FreeSound du son (page), ex: .../sounds/844708/")
-    return int(m.group(1))
-def api_url_from_sound_id(sound_id: int) -> str:
-    return f"https://freesound.org/apiv2/sounds/{sound_id}/"
-# ----------------------------
-# Preprocessing helpers
-# ----------------------------
-def clean_tags(tags: Any) -> str:
     """
-    Nettoie tags :
-    - support list ou str
-    - décode %3B etc
-    - split sur ; , espace
-    - lower
-    - supprime doublons
     """
-    if tags is None:
-        return ""
-    if isinstance(tags, list):
-        raw = " ".join([str(t) for t in tags])
-    else:
-        raw = str(tags)
-    raw = urllib.parse.unquote(raw)
-    raw = raw.replace(",", " ").replace(";", " ").replace("|", " ")
-    raw = re.sub(r"\s+", " ", raw).strip().lower()
-    toks = [t for t in raw.split(" ") if t]
-    toks = [t for t in toks if len(t) >= 2]
-    seen = set()
-    out = []
-    for t in toks:
-        if t not in seen:
-            seen.add(t)
-            out.append(t)
-    return " ".join(out)
-def clean_text(x: Any) -> str:
-    if x is None:
-        return ""
-    s = str(x)
-    s = urllib.parse.unquote(s)
-    s = s.lower()
-    s = re.sub(r"\s+", " ", s).strip()
-    return s
-def safe_num(x: Any) -> float:
     try:
-        if x is None:
-            return 0.0
-        return float(x)
-    except Exception:
-        return 0.0
-def safe_len_list(x: Any) -> int:
-    if isinstance(x, list):
-        return len(x)
-    return 0
-# ----------------------------
-# Extract raw features (before)
-# ----------------------------
-RAW_COLUMNS = [
-    "id", "name", "username", "license", "created",
-    "description", "tags",
-    "duration", "samplerate", "bitrate", "bitdepth", "channels",
-    "filesize", "type",
-    "num_downloads", "num_ratings", "avg_rating",
-]
-def extract_raw_df(sound_json: Dict[str, Any]) -> pd.DataFrame:
-    row = {k: sound_json.get(k) for k in RAW_COLUMNS}
-    # certains champs peuvent être absents selon droits/endpoint
-    if "tags" not in row:
-        row["tags"] = sound_json.get("tags")
-    return pd.DataFrame([row])
-# ----------------------------
-# Build "after preprocessing" features
-# ----------------------------
-def build_after_features(raw_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    """
-    Retourne:
-    - after_readable_df : colonnes interprétables (nettoyées + dérivées)
-    - after_vector_df   : features vectorisées (TFIDF + numeric scaled) pour "voir" l’embedding
-    """
-    df = raw_df.copy()
-    # Nettoyages
-    df["tags_clean"] = df["tags"].apply(clean_tags)
-    df["name_clean"] = df["name"].apply(clean_text)
-    df["desc_clean"] = df["description"].apply(clean_text)
-    # Features dérivées (lisibles)
-    df["num_tags"] = df["tags"].apply(safe_len_list)
-    df["name_len"] = df["name_clean"].apply(lambda s: len(s))
-    df["desc_len"] = df["desc_clean"].apply(lambda s: len(s))
-    df["text_all"] = (df["name_clean"].fillna("") + " " + df["desc_clean"].fillna("") + " " + df["tags_clean"].fillna("")).str.strip()
-    # Numeric basic
-    numeric_cols = ["duration", "samplerate", "bitrate", "bitdepth", "channels", "filesize", "num_downloads", "num_ratings", "avg_rating",
-                    "num_tags", "name_len", "desc_len"]
-    for c in numeric_cols:
-        df[c] = df[c].apply(safe_num)
-    # 1) after_readable_df (ce que tu veux lire facilement)
-    after_readable_cols = [
-        "id", "type", "license", "created",
-        "name_clean", "tags_clean",
-        "duration", "samplerate", "channels", "filesize",
-        "num_downloads", "num_ratings", "avg_rating",
-        "num_tags", "name_len", "desc_len",
-    ]
-    after_readable_df = df[after_readable_cols].copy()
-    # 2) vectorisation texte (TF-IDF) + standardisation numeric
-    # Sur un seul son, TF-IDF marche quand même (tu verras les termes présents).
-    tfidf = TfidfVectorizer(max_features=60, ngram_range=(1, 2))
-    X_text = tfidf.fit_transform(df["text_all"].fillna(""))
-    # Numeric scaling
-    scaler = StandardScaler()
-    X_num = scaler.fit_transform(df[numeric_cols].to_numpy())
-    # Assemble en DataFrame pour affichage
-    text_feature_names = [f"tfidf:{t}" for t in tfidf.get_feature_names_out()]
-    X_text_dense = X_text.toarray()
-    num_feature_names = [f"num:{c}" for c in numeric_cols]
-    all_features = np.concatenate([X_num, X_text_dense], axis=1)
-    all_names = num_feature_names + text_feature_names
-    after_vector_df = pd.DataFrame(all_features, columns=all_names)
-    return after_readable_df, after_vector_df
-# ----------------------------
-# Main analysis function
-# ----------------------------
-def analyze(url: str, api_key: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-    if not url or not url.strip():
-        raise ValueError("Colle l’URL du son FreeSound.")
-    api_key = (api_key or "").strip() or os.environ.get("FREESOUND_API_KEY", "").strip()
-    if not api_key:
-        raise ValueError("Il faut une clé FreeSound API. Mets-la dans le champ 'API key' ou dans FREESOUND_API_KEY.")
-    sound_id = sound_id_from_freesound_page(url)
-    api_url = api_url_from_sound_id(sound_id)
-    headers = {"Authorization": f"Token {api_key}"}
-    sound_json = fetch_json_with_retry(api_url, headers=headers)
-    before_df = extract_raw_df(sound_json)
-    after_readable_df, after_vector_df = build_after_features(before_df)
-    # Bonus: afficher seulement les top features TF-IDF non-nulles
-    # (sur un seul sample, c'est plus clair)
-    nonzero = after_vector_df.loc[0]
-    top = nonzero[nonzero != 0].sort_values(key=lambda s: np.abs(s), ascending=False).head(30)
-    top_df = top.reset_index()
-    top_df.columns = ["feature", "value"]
-    return before_df, after_readable_df, top_df
-# ----------------------------
-# Gradio UI
-# ----------------------------
-with gr.Blocks(title="FreeSound - Prétraitement Metadata") as demo:
-    gr.Markdown("## 🎧 FreeSound – Prétraitement Metadata\n"
-                "Objectif : **visualiser les features AVANT et APRÈS preprocessing**.\n\n"
-                "- Entrée = **URL du son FreeSound** (page)\n"
-                "- Sorties = **tableau avant**, **tableau après**, **top features (vectorisées)**")
-    with gr.Row():
-        url_in = gr.Textbox(
-            label="URL du son FreeSound",
-            placeholder="https://freesound.org/people/.../sounds/844708/",
-            value="",
         )
-    api_in = gr.Textbox(
-        label="API key (Token) FreeSound (optionnel si FREESOUND_API_KEY est set)",
-        placeholder="Colle ta clé ici (Token ...)",
-        type="password",
-        value="",
     )
-    btn = gr.Button("Analyser")
-    gr.Markdown("### Avant (raw metadata)")
-    before_out = gr.Dataframe(interactive=False, wrap=True)
-    gr.Markdown("### Après (nettoyé + features dérivées lisibles)")
-    after_out = gr.Dataframe(interactive=False, wrap=True)
-    gr.Markdown("### Top features après vectorisation (num + TF-IDF) — valeurs non nulles")
-    top_out = gr.Dataframe(interactive=False, wrap=True)
     btn.click(
-        fn=analyze,
-        inputs=[url_in, api_in],
-        outputs=[before_out, after_out, top_out],
     )
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import re
 import time
+import tempfile
+import joblib
 import numpy as np
 import pandas as pd
 import gradio as gr
+import opensmile
+import xgboost as xgb
+import soundfile as sf
+from pydub import AudioSegment
+import freesound
+# =========================
+# CONFIG
+# =========================
+MIN_EFFECT = 1
+MAX_EFFECT = 30
+MIN_MUSIC = 31
+MAX_MUSIC = 600
+SR_TARGET = 16000
+# Mets ton token FreeSound dans une variable d'environnement :
+# export FREESOUND_API_TOKEN="xxxxx"
+API_TOKEN = os.getenv("FREESOUND_API_TOKEN", "").strip()
+# Modèles openSMILE (les tiens)
+MODEL_EFFECT_PATH = "xgb_model_EffectSound.pkl"
+MODEL_MUSIC_PATH = "xgb_model_Music.pkl"
+MODEL_EFFECT = joblib.load(MODEL_EFFECT_PATH)
+MODEL_MUSIC = joblib.load(MODEL_MUSIC_PATH)
+RATING_DISPLAY_AUDIO = {
+    0: "❌ Informations manquantes",
+    1: "⭐ Faible",
+    2: "⭐⭐ Moyen",
+    3: "⭐⭐⭐ Élevé",
+}
+DOWNLOADS_DISPLAY_AUDIO = {
+    0: "⭐ Faible",
+    1: "⭐⭐ Moyen",
+    2: "⭐⭐⭐ Élevé",
+}
+SMILE = opensmile.Smile(
+    feature_set=opensmile.FeatureSet.eGeMAPSv02,
+    feature_level=opensmile.FeatureLevel.Functionals,
+)
+# =========================
+# UI helpers
+# =========================
+CSS = """
+#header-title { font-size: 28px; font-weight: 800; margin-bottom: 6px; }
+#header-sub { color:#444; margin-top:0; }
+.card {
+  border: 1px solid #e5e7eb; border-radius: 14px; padding: 14px 14px;
+  background: #fff; box-shadow: 0 3px 10px rgba(0,0,0,0.04);
+}
+.badge { display:inline-block; padding:6px 10px; border-radius:999px; font-weight:700; font-size:12px; }
+.badge.music { background:#eef2ff; color:#3730a3; }
+.badge.fx { background:#ecfeff; color:#155e75; }
+.kv { margin:6px 0; }
+.k { font-weight:700; }
+.hint { color:#6b7280; font-size:12px; margin-top:8px; }
+.err { color:#991b1b; font-weight:700; }
+"""
+def html_error(title: str, msg: str) -> str:
+    return f"""
+<div class="card">
+  <div class="err">❌ {title}</div>
+  <div style="margin-top:8px">{msg}</div>
+</div>
+"""
+def html_result(badge: str, duration: float, rating_text: str, downloads_text: str, extra_html: str = "") -> str:
+    klass = "music" if "Musique" in badge else "fx"
+    return f"""
+<div class="card">
+  <div class="badge {klass}">{badge}</div>
+  <div class="kv"><span class="k">Durée :</span> {duration:.2f}s</div>
+  <div class="kv"><span class="k">Rating (classe) :</span> {rating_text}</div>
+  <div class="kv"><span class="k">Downloads (classe) :</span> {downloads_text}</div>
+  {extra_html}
+</div>
+"""
+def interpret_results(avg_class: int, dl_class: int) -> str:
+    if avg_class == 0:
+        return (
+            "ℹ️ <b>Interprétation</b> :<br>"
+            "Aucune évaluation possible (rating manquant)."
+        )
+    rating_txt = {1: "faible", 2: "moyenne", 3: "élevée"}.get(avg_class, "inconnue")
+    downloads_txt = {0: "faible", 1: "modérée", 2: "élevée"}.get(dl_class, "inconnue")
+    if avg_class == 3 and dl_class == 2:
+        potentiel = "très fort"; detail = "contenu de haute qualité et très populaire."
+    elif avg_class == 3 and dl_class == 1:
+        potentiel = "fort"; detail = "contenu bien apprécié, en croissance."
+    elif avg_class == 3 and dl_class == 0:
+        potentiel = "prometteur"; detail = "bonne qualité mais faible visibilité (peut gagner en popularité)."
+    elif avg_class == 2 and dl_class == 2:
+        potentiel = "modéré à fort"; detail = "populaire mais qualité perçue moyenne."
+    elif avg_class == 2 and dl_class == 1:
+        potentiel = "modéré"; detail = "profil standard, popularité stable."
+    elif avg_class == 2 and dl_class == 0:
+        potentiel = "limité"; detail = "engagement faible, diffusion limitée."
+    elif avg_class == 1 and dl_class == 2:
+        potentiel = "contradictoire"; detail = "très téléchargé mais peu apprécié (usage pratique possible)."
+    elif avg_class == 1 and dl_class == 1:
+        potentiel = "faible"; detail = "peu attractif pour les utilisateurs."
+    else:
+        potentiel = "très faible"; detail = "faible intérêt global."
+    return f"<b>Interprétation</b> :<br>Potentiel estimé : <b>{potentiel}</b> — {detail}"
+# =========================
+# FreeSound helpers
+# =========================
+def extract_freesound_id(url: str) -> int:
+    if not url or not url.strip():
+        raise ValueError("URL vide")
+    # accepte: https://freesound.org/s/123456/
+    m = re.search(r"/s/(\d+)", url)
+    if not m:
+        # fallback: dernier segment numérique
+        parts = [p for p in url.strip().rstrip("/").split("/") if p]
+        if not parts or not parts[-1].isdigit():
+            raise ValueError("Impossible d'extraire l'ID depuis l'URL")
+        return int(parts[-1])
+    return int(m.group(1))
+def get_fs_client() -> freesound.FreesoundClient:
+    if not API_TOKEN:
+        raise RuntimeError(
+            "Token FreeSound manquant. Mets-le dans FREESOUND_API_TOKEN (variable d'environnement)."
+        )
+    c = freesound.FreesoundClient()
+    c.set_token(API_TOKEN, "token")
+    return c
+def download_preview_with_retry(client: freesound.FreesoundClient, sound_id: int, tries: int = 4, sleep_base: float = 1.0):
     """
+    Télécharge le preview FreeSound dans un fichier temporaire.
+    Retry simple (souvent utile quand FreeSound coupe / rate-limit).
     """
     last_err = None
+    for i in range(tries):
         try:
+            snd = client.get_sound(sound_id)
+            # on force un mp3 (preview) -> pydub sait le lire (si ffmpeg dispo)
+            tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
+            tmp.close()
+            snd.retrieve_preview(tmp.name)
+            return tmp.name, snd
         except Exception as e:
             last_err = e
+            time.sleep(sleep_base * (2 ** i))
+    raise RuntimeError(f"Échec téléchargement preview après {tries} essais: {last_err}")
+# =========================
+# Audio helpers
+# =========================
+def get_duration_seconds(filepath: str) -> float:
+    ext = os.path.splitext(filepath)[1].lower()
+    if ext == ".mp3":
+        audio = AudioSegment.from_file(filepath)
+        return len(audio) / 1000.0
+    with sf.SoundFile(filepath) as f:
+        return len(f) / f.samplerate
+def to_wav_16k_mono(filepath: str) -> str:
+    ext = os.path.splitext(filepath)[1].lower()
+    if ext == ".wav":
+        try:
+            with sf.SoundFile(filepath) as f:
+                if f.samplerate == SR_TARGET and f.channels == 1:
+                    return filepath
+        except Exception:
+            pass
+    audio = AudioSegment.from_file(filepath)
+    audio = audio.set_channels(1).set_frame_rate(SR_TARGET)
+    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+    tmp.close()
+    audio.export(tmp.name, format="wav")
+    return tmp.name
+def extract_opensmile_features(filepath: str) -> pd.DataFrame:
+    wav_path = to_wav_16k_mono(filepath)
+    feats = SMILE.process_file(wav_path)
+    feats = feats.select_dtypes(include=[np.number]).reset_index(drop=True)
+    return feats
+def expected_feature_names(model) -> list[str]:
+    if hasattr(model, "estimators_"):  # multioutput wrapper
+        base = model.estimators_[0]
+        if hasattr(base, "feature_names_in_"):
+            return list(base.feature_names_in_)
+        # fallback xgb
+        if hasattr(base, "get_booster"):
+            bn = base.get_booster().feature_names
+            if bn:
+                return list(bn)
+    if hasattr(model, "feature_names_in_"):
+        return list(model.feature_names_in_)
+    if hasattr(model, "get_booster"):
+        bn = model.get_booster().feature_names
+        if bn:
+            return list(bn)
+    raise RuntimeError("Impossible de récupérer la liste des features attendues par le modèle.")
+def predict_with_dmatrix(model, X_df: pd.DataFrame) -> np.ndarray:
     """
+    Robust contre: 'data did not contain feature names'
+    Supporte MultiOutput (estimators_)
     """
+    if hasattr(model, "estimators_"):
+        preds = []
+        for est in model.estimators_:
+            booster = est.get_booster() if hasattr(est, "get_booster") else est
+            dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns))
+            p = booster.predict(dm)
+            preds.append(np.asarray(p).reshape(-1))
+        return np.column_stack(preds)
+    booster = model.get_booster() if hasattr(model, "get_booster") else model
+    dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns))
+    p = booster.predict(dm)
+    return np.asarray(p).reshape(1, -1)
+# =========================
+# Main pipeline (URL -> download -> features -> align -> predict)
+# =========================
+def predict_from_freesound_url(url: str):
+    # 1) parse URL
     try:
+        sound_id = extract_freesound_id(url)
+    except Exception as e:
+        return (
+            html_error("URL invalide", f"{e}"),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame()
+        )
+    # 2) API + download preview
+    try:
+        client = get_fs_client()
+        audio_path, snd = download_preview_with_retry(client, sound_id)
+    except Exception as e:
+        return (
+            html_error("Erreur FreeSound", f"Détail : <code>{e}</code>"),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame()
+        )
+    # 3) duration + model select
+    try:
+        duration = float(getattr(snd, "duration", None) or 0.0)
+        if duration <= 0:
+            duration = get_duration_seconds(audio_path)
+    except Exception as e:
+        return (
+            html_error("Audio illisible", f"Impossible de lire la durée.<br>Détail : <code>{e}</code>"),
+            pd.DataFrame(),
+            pd.DataFrame(),
+            pd.DataFrame()
+        )
+    if duration < MIN_EFFECT:
+        return (
+            html_error(
+                "Audio trop court",
+                f"Durée détectée : <b>{duration:.2f} s</b><br><br>"
+                f"Plages acceptées :<br>"
+                f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>"
+                f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>"
+            ),
+            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        )
+    if (MAX_EFFECT < duration < MIN_MUSIC) or duration > MAX_MUSIC:
+        return (
+            html_error(
+                "Audio hors plage",
+                f"Durée détectée : <b>{duration:.2f} s</b><br><br>"
+                f"Plages acceptées :<br>"
+                f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>"
+                f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>"
+            ),
+            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        )
+    if duration <= MAX_EFFECT:
+        badge = "🔊 Effet sonore (URL FreeSound → openSMILE)"
+        model = MODEL_EFFECT
+    else:
+        badge = "🎵 Musique (URL FreeSound → openSMILE)"
+        model = MODEL_MUSIC
+    # 4) extract openSMILE features (AVANT)
+    try:
+        X_before = extract_opensmile_features(audio_path)
+    except Exception as e:
+        return (
+            html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>"),
+            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        )
+    # 5) align features (APRÈS)
+    try:
+        expected = expected_feature_names(model)
+        before_cols = list(X_before.columns)
+        X_after = X_before.reindex(columns=expected, fill_value=0)
+        missing_added = [c for c in expected if c not in before_cols]
+        extras_dropped = [c for c in before_cols if c not in expected]
+        diff_df = pd.DataFrame({
+            "missing_added_(filled_0)": pd.Series(missing_added, dtype="object"),
+            "extras_dropped": pd.Series(extras_dropped, dtype="object"),
+        })
+    except Exception as e:
+        return (
+            html_error("Alignement des features échoué", f"Détail : <code>{e}</code>"),
+            pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
+        )
+    # 6) predict
+    try:
+        y = predict_with_dmatrix(model, X_after)
+        y = np.array(y)
+        avg_class = int(y[0, 0])
+        dl_class = int(y[0, 1])
+    except Exception as e:
+        return (
+            html_error("Prédiction échouée", f"Détail : <code>{e}</code>"),
+            X_before, X_after, diff_df
         )
+    rating_text = RATING_DISPLAY_AUDIO.get(avg_class, str(avg_class))
+    downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, str(dl_class))
+    conclusion = interpret_results(avg_class, dl_class)
+    extra = f"""
+<div class="hint">ID FreeSound : <b>{sound_id}</b> · Preview téléchargé automatiquement</div>
+<div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
+{conclusion}
+</div>
+"""
+    return html_result(badge, duration, rating_text, downloads_text, extra_html=extra), X_before, X_after, diff_df
+# =========================
+# UI (fusion: 1 seule entrée URL)
+# =========================
+theme = gr.themes.Soft()
+with gr.Blocks(title="Prédiction popularité — URL FreeSound", css=CSS, theme=theme) as demo:
+    gr.HTML(
+        f"""
+<div id="header-title">Prédiction de popularité — URL FreeSound</div>
+<p id="header-sub">
+✅ Entrée = URL FreeSound → téléchargement preview → openSMILE → sélection auto du modèle → prédiction<br>
+<b>Durées acceptées :</b> 🔊 Effet sonore {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 Musique {MIN_MUSIC}–{MAX_MUSIC}s
+</p>
+"""
     )
+    with gr.Row():
+        with gr.Column(scale=1):
+            url_in = gr.Textbox(
+                label="URL FreeSound",
+                placeholder="https://freesound.org/s/123456/",
+            )
+            btn = gr.Button("🚀 Prédire depuis l’URL", variant="primary")
+        with gr.Column(scale=1):
+            out_html = gr.HTML(label="Résultat")
+    gr.Markdown("## Features")
+    with gr.Row():
+        feat_before = gr.Dataframe(label="Features AVANT (openSMILE raw)", wrap=True, max_rows=20)
+        feat_after = gr.Dataframe(label="Features APRÈS (alignées modèle)", wrap=True, max_rows=20)
+    diff_out = gr.Dataframe(label="Diff (manquantes ajoutées / extras supprimées)", wrap=True, max_rows=50)
     btn.click(
+        predict_from_freesound_url,
+        inputs=[url_in],
+        outputs=[out_html, feat_before, feat_after, diff_out],
     )
+demo.launch()

xgb_avg_rating_effectsound_label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0d4f212926af0fb0d1d4ddc2a77f10e62ba4c0b87131297514ae697d979d29
+size 508

xgb_avg_rating_effectsound_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af8cfbf2e681e80ea641e30b40ac280999e9a72e9bd95b28f33755771ccd51e5
+size 10219909

xgb_avg_rating_music_features.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa0509103f06306aa1d21f074fc89ee08d90d9cf4b0c2b3a9a5b3c4436d4c5af
+size 631

xgb_avg_rating_music_label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0d4f212926af0fb0d1d4ddc2a77f10e62ba4c0b87131297514ae697d979d29
+size 508

xgb_avg_rating_music_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4e996630b373e8594079d5ae3bf2a52707f8f163aedd6c6330416b9a056b8e9
+size 7046656

xgb_model_EffectSound.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f41317a1a2ac6916e2fc40a8a43097021520ea0de78632149a30ee946b1c697a
+size 16161360

xgb_model_Music.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89dc204e1e774da5b44df74d25d654bce417e4d7304b3bf2efde901dccaf2919
+size 16904032

xgb_num_downloads_effectsound_features.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa0509103f06306aa1d21f074fc89ee08d90d9cf4b0c2b3a9a5b3c4436d4c5af
+size 631

xgb_num_downloads_effectsound_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a33ae31fc84dd8dc080d75f61e4016690fa2730cdd9b7dbb9720d7eb778adca
+size 8595460

xgb_num_downloads_music_features.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa0509103f06306aa1d21f074fc89ee08d90d9cf4b0c2b3a9a5b3c4436d4c5af
+size 631

xgb_num_downloads_music_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b8abbccd5ee7f195386936b8447c7d9c5b336f6f8b5740563d6803389a2c45a
+size 8754226