Spaces:

ikramelhadi
/

freesound-popularity-interface

Sleeping

App Files Files Community

IKRAMELHADI commited on 16 days ago

Commit

f11f89f

1 Parent(s): d2ffb31

UI FR Gradio6

Browse files

Files changed (1) hide show

app.py +278 -165

app.py CHANGED Viewed

@@ -8,45 +8,145 @@ import joblib
 import soundfile as sf
 from pydub import AudioSegment
 import opensmile
 import xgboost as xgb
 # =========================
-# CONFIG
 # =========================
-SR_TARGET = 16000
 MIN_EFFECT, MAX_EFFECT = 0.5, 3.0
 MIN_MUSIC, MAX_MUSIC = 10.0, 60.0
 MODEL_EFFECT_PATH = "xgb_model_EffectSound.pkl"
 MODEL_MUSIC_PATH = "xgb_model_Music.pkl"
-SMILE = opensmile.Smile(
-    feature_set=opensmile.FeatureSet.eGeMAPSv02,
-    feature_level=opensmile.FeatureLevel.Functionals,
-)
 MODEL_EFFECT = joblib.load(MODEL_EFFECT_PATH)
 MODEL_MUSIC = joblib.load(MODEL_MUSIC_PATH)
-# Classes -> affichage (comme tu veux)
-RATING_DISPLAY = {
     0: "Informations manquantes",
     1: "⭐ Faible",
     2: "⭐⭐ Moyen",
     3: "⭐⭐⭐ Élevé",
 }
-DOWNLOADS_DISPLAY = {
     0: "Faible",
     1: "Moyen",
     2: "Élevé",
 }
-# =========================
-# AUDIO HELPERS
-# =========================
 def get_duration_seconds(filepath):
     ext = os.path.splitext(filepath)[1].lower()
     if ext == ".mp3":
@@ -82,65 +182,20 @@ def extract_opensmile_features(filepath):
     return feats
-# =========================
-# SAFE MULTIOUTPUT PREDICT
-# =========================
-def predict_multioutput(model, X_df):
-    preds = []
-    for est in model.estimators_:
-        try:
             p = est.predict(X_df)
-        except Exception:
-            booster = est.get_booster()
-            dm = xgb.DMatrix(X_df, feature_names=list(X_df.columns))
-            p = booster.predict(dm)
-        preds.append(np.asarray(p).reshape(-1))
-    return np.column_stack(preds)
-# =========================
-# UI HELPERS (HTML)
-# =========================
-def html_error(title, body_html):
-    return f"""
-<div class="card card-error">
-  <div class="card-title">❌ {title}</div>
-  <div class="card-body">{body_html}</div>
-</div>
-""".strip()
-def html_result(audio_type, duration, rating_text, downloads_text):
-    badge = "🎵 Musique" if audio_type == "Music" else "🔊 Effet sonore"
-    return f"""
-<div class="card">
-  <div class="badges">
-    <span class="badge badge-type">{badge}</span>
-    <span class="badge badge-time">⏱️ {duration:.2f} s</span>
-  </div>
-  <div class="grid">
-    <div class="box">
-      <div class="box-title">📈 Note moyenne (popularité)</div>
-      <div class="box-value">{rating_text}</div>
-    </div>
-    <div class="box">
-      <div class="box-title">⬇️ Téléchargements (popularité)</div>
-      <div class="box-value">{downloads_text}</div>
-    </div>
-  </div>
-  <div class="hint">
-    Résultats affichés en <b>niveaux</b> (faible / moyen / élevé), pas en valeurs exactes.
-  </div>
-</div>
-""".strip()
-# =========================
-# MAIN PREDICTION
-# =========================
-def predict_popularity(audio_file):
     if audio_file is None:
         return html_error("Aucun fichier", "Veuillez importer un fichier audio (wav, mp3, flac…).")
@@ -150,7 +205,7 @@ def predict_popularity(audio_file):
     except Exception as e:
         return html_error("Audio illisible", f"Impossible de lire l'audio.<br>Détail : <code>{e}</code>")
-    # Vérif plages
     if duration < MIN_EFFECT:
         return html_error(
             "Audio trop court",
@@ -171,10 +226,10 @@ def predict_popularity(audio_file):
     # Type + modèle
     if duration <= MAX_EFFECT:
-        audio_type = "SoundEffect"
         model = MODEL_EFFECT
     else:
-        audio_type = "Music"
         model = MODEL_MUSIC
     # Features openSMILE
@@ -183,121 +238,179 @@ def predict_popularity(audio_file):
     except Exception as e:
         return html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>")
-    # Align colonnes
     try:
-        expected = model.estimators_[0].feature_names_in_
-        X = X.reindex(columns=expected, fill_value=0)
     except Exception as e:
         return html_error("Alignement des features échoué", f"Détail : <code>{e}</code>")
     # Predict
     try:
-        y = predict_multioutput(model, X)
     except Exception as e:
         return html_error("Prédiction échouée", f"Détail : <code>{e}</code>")
-    avg_class = int(y[0, 0])
-    dl_class = int(y[0, 1])
-    rating_text = RATING_DISPLAY.get(avg_class, "Inconnu")
-    downloads_text = DOWNLOADS_DISPLAY.get(dl_class, "Inconnu")
-    return html_result(audio_type, duration, rating_text, downloads_text)
-# =========================
-# UI
-# =========================
-theme = gr.themes.Soft()
-css = """
-/* Layout */
-.card {
-  border: 1px solid #e5e7eb;
-  background: #ffffff;
-  padding: 16px;
-  border-radius: 16px;
-}
-.card-error{
-  border-color: #fca5a5;
-  background: #fff1f2;
-}
-.card-title{
-  font-weight: 800;
-  margin-bottom: 8px;
-}
-.card-body{
-  color: #7f1d1d;
-  line-height: 1.45;
-}
-.badges{
-  display:flex;
-  gap:10px;
-  flex-wrap:wrap;
-  margin-bottom:12px;
-}
-.badge{
-  padding:6px 10px;
-  border-radius:999px;
-  font-weight:700;
-  font-size: 13px;
-  border: 1px solid #e5e7eb;
-}
-.badge-type{ background:#eef2ff; color:#3730a3;}
-.badge-time{ background:#ecfeff; color:#155e75;}
-.grid{
-  display:grid;
-  grid-template-columns: 1fr;
-  gap:10px;
-}
-.box{
-  border:1px solid #e5e7eb;
-  border-radius:14px;
-  padding:12px;
-  background:#fafafa;
-}
-.box-title{ font-weight:800; margin-bottom:4px; }
-.box-value{ font-size:18px; }
-.hint{
-  margin-top:10px;
-  color:#6b7280;
-  font-size:12px;
-}
-#header-title { font-size: 28px; font-weight: 900; margin-bottom: 6px; }
-#header-sub { color:#6b7280; margin-top:0px; line-height:1.45; }
-"""
-with gr.Blocks(title="Prédiction de popularité audio") as demo:
     gr.HTML(
         f"""
-<div id="header-title">🎧 Prédiction de popularité audio</div>
 <p id="header-sub">
-Importez un audio. Si la durée est valide, nous extrayons des caractéristiques acoustiques
-(openSMILE eGeMAPS) puis nous prédisons des <b>niveaux de popularité</b> pour la note moyenne et les téléchargements.
-<br><br>
 <b>Durées acceptées :</b> 🔊 Effet sonore {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 Musique {MIN_MUSIC}–{MAX_MUSIC}s
 </p>
 """
     )
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 1) Importer un fichier")
-            audio_in = gr.Audio(type="filepath", label="Fichier audio")
-            btn = gr.Button("🚀 Lancer la prédiction", variant="primary")
-            gr.Markdown(
-                """
-**Conseil :** utilisez un extrait clair (bonne qualité audio) pour une meilleure extraction de features.
-"""
-            )
-        with gr.Column(scale=1):
-            gr.Markdown("### 2) Résultat")
-            out = gr.HTML(value="")
-    btn.click(predict_popularity, inputs=audio_in, outputs=out)
-demo.launch(theme=theme, css=css)

 import soundfile as sf
 from pydub import AudioSegment
 import opensmile
+import freesound
 import xgboost as xgb
 # =========================
+# RÈGLES DURÉE
 # =========================
 MIN_EFFECT, MAX_EFFECT = 0.5, 3.0
 MIN_MUSIC, MAX_MUSIC = 10.0, 60.0
+SR_TARGET = 16000
+# =========================
+# UI (CSS)
+# =========================
+CSS = """
+.card {
+  border: 1px solid #e5e7eb;
+  background: #ffffff;
+  padding: 16px;
+  border-radius: 16px;
+}
+.card-error{
+  border-color: #fca5a5;
+  background: #fff1f2;
+}
+.card-title{
+  font-weight: 950;
+  margin-bottom: 8px;
+}
+.badges{
+  display:flex;
+  gap:10px;
+  flex-wrap:wrap;
+  margin-bottom:12px;
+}
+.badge{
+  padding:6px 10px;
+  border-radius:999px;
+  font-weight:900;
+  font-size: 13px;
+  border: 1px solid #e5e7eb;
+}
+.badge-type{ background:#eef2ff; color:#3730a3;}
+.badge-time{ background:#ecfeff; color:#155e75;}
+.grid{
+  display:grid;
+  grid-template-columns: 1fr;
+  gap:10px;
+}
+.box{
+  border:1px solid #e5e7eb;
+  border-radius:14px;
+  padding:12px;
+  background:#fafafa;
+}
+.box-title{ font-weight:900; margin-bottom:4px; }
+.box-value{ font-size:18px; font-weight:800; }
+.hint{
+  margin-top:10px;
+  color:#6b7280;
+  font-size:12px;
+}
+#header-title { font-size: 28px; font-weight: 950; margin-bottom: 6px; }
+#header-sub { color:#6b7280; margin-top:0px; line-height:1.45; }
+"""
+def html_error(title, body_html):
+    return f"""
+<div class="card card-error">
+  <div class="card-title">❌ {title}</div>
+  <div>{body_html}</div>
+</div>
+""".strip()
+def html_result(badge_text, duration, rating_text, downloads_text, extra_html=""):
+    return f"""
+<div class="card">
+  <div class="badges">
+    <span class="badge badge-type">{badge_text}</span>
+    <span class="badge badge-time">⏱️ {duration:.2f} s</span>
+  </div>
+  <div class="grid">
+    <div class="box">
+      <div class="box-title">📈 Popularité de la note moyenne</div>
+      <div class="box-value">{rating_text}</div>
+    </div>
+    <div class="box">
+      <div class="box-title">⬇️ Popularité des téléchargements</div>
+      <div class="box-value">{downloads_text}</div>
+    </div>
+  </div>
+  {extra_html}
+  <div class="hint">
+    Résultats affichés en <b>niveaux</b> (faible / moyen / élevé), pas en valeurs exactes.
+  </div>
+</div>
+""".strip()
+# ============================================================
+# PARTIE A — Upload audio → openSMILE → modèles (toi)
+# ============================================================
+# Tes modèles upload
 MODEL_EFFECT_PATH = "xgb_model_EffectSound.pkl"
 MODEL_MUSIC_PATH = "xgb_model_Music.pkl"
 MODEL_EFFECT = joblib.load(MODEL_EFFECT_PATH)
 MODEL_MUSIC = joblib.load(MODEL_MUSIC_PATH)
+# Mapping (sans very high)
+RATING_DISPLAY_AUDIO = {
     0: "Informations manquantes",
     1: "⭐ Faible",
     2: "⭐⭐ Moyen",
     3: "⭐⭐⭐ Élevé",
 }
+DOWNLOADS_DISPLAY_AUDIO = {
     0: "Faible",
     1: "Moyen",
     2: "Élevé",
 }
+SMILE = opensmile.Smile(
+    feature_set=opensmile.FeatureSet.eGeMAPSv02,
+    feature_level=opensmile.FeatureLevel.Functionals,
+)
 def get_duration_seconds(filepath):
     ext = os.path.splitext(filepath)[1].lower()
     if ext == ".mp3":
     return feats
+def predict_multioutput_safely(model, X_df):
+    """
+    Supporte MultiOutput wrapper (estimators_) en gardant les feature names.
+    """
+    if hasattr(model, "estimators_"):
+        preds = []
+        for est in model.estimators_:
             p = est.predict(X_df)
+            preds.append(np.asarray(p).reshape(-1))
+        return np.column_stack(preds)
+    return model.predict(X_df)
+def predict_from_uploaded_audio(audio_file):
     if audio_file is None:
         return html_error("Aucun fichier", "Veuillez importer un fichier audio (wav, mp3, flac…).")
     except Exception as e:
         return html_error("Audio illisible", f"Impossible de lire l'audio.<br>Détail : <code>{e}</code>")
+    # Vérif durées
     if duration < MIN_EFFECT:
         return html_error(
             "Audio trop court",
     # Type + modèle
     if duration <= MAX_EFFECT:
+        badge = "🔊 Effet sonore (upload)"
         model = MODEL_EFFECT
     else:
+        badge = "🎵 Musique (upload)"
         model = MODEL_MUSIC
     # Features openSMILE
     except Exception as e:
         return html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>")
+    # Align features
     try:
+        expected = model.estimators_[0].feature_names_in_ if hasattr(model, "estimators_") else model.feature_names_in_
+        X = X.reindex(columns=list(expected), fill_value=0)
     except Exception as e:
         return html_error("Alignement des features échoué", f"Détail : <code>{e}</code>")
     # Predict
     try:
+        y = predict_multioutput_safely(model, X)
     except Exception as e:
         return html_error("Prédiction échouée", f"Détail : <code>{e}</code>")
+    y = np.array(y)
+    if y.ndim == 2:
+        avg_class = int(y[0, 0])
+        dl_class = int(y[0, 1])
+    else:
+        avg_class = int(y[0])
+        dl_class = int(y[1])
+    rating_text = RATING_DISPLAY_AUDIO.get(avg_class, "Inconnu")
+    downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, "Inconnu")
+    return html_result(badge, duration, rating_text, downloads_text)
+# ============================================================
+# PARTIE B — URL FreeSound → API → modèles (collègue)
+# ============================================================
+# ⚠️ Ici tu écris ton token (ou tu utilises Secrets si tu veux plus tard)
+API_TOKEN = "A ECRIRE"
+fs_client = freesound.FreesoundClient()
+fs_client.set_token(API_TOKEN, "token")
+# Modèles collègue
+# Music
+xgb_music_num = joblib.load("xgb_num_downloads_music_model.pkl")
+xgb_music_feat_num = joblib.load("xgb_num_downloads_music_features.pkl")
+xgb_music_avg = joblib.load("xgb_avg_rating_music_model.pkl")
+xgb_music_feat_avg = joblib.load("xgb_avg_rating_music_features.pkl")
+le_music_avg = joblib.load("xgb_avg_rating_music_label_encoder.pkl")
+# Effect Sound
+xgb_effect_num = joblib.load("xgb_num_downloads_effectsound_model.pkl")
+xgb_effect_feat_num = joblib.load("xgb_num_downloads_effectsound_features.pkl")
+xgb_effect_avg = joblib.load("xgb_avg_rating_effectsound_model.pkl")
+xgb_effect_feat_avg = joblib.load("xgb_avg_rating_effectsound_features.pkl")
+le_effect_avg = joblib.load("xgb_avg_rating_effectsound_label_encoder.pkl")
+NUM_DOWNLOADS_MAP = {0: "Faible", 1: "Moyen", 2: "Élevé"}
+def safe_float(v):
+    try:
+        return float(v)
+    except Exception:
+        return 0.0
+def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None):
+    row = []
+    for col in feat_list:
+        val = features_dict.get(col, 0)
+        if val is None or isinstance(val, (list, dict)):
+            val = 0
+        row.append(safe_float(val))
+    X = pd.DataFrame([row], columns=feat_list)
+    dmatrix = xgb.DMatrix(X.values, feature_names=feat_list)
+    pred_int = int(model.get_booster().predict(dmatrix)[0])
+    if label_encoder is not None:
+        return label_encoder.inverse_transform([pred_int])[0]
+    return pred_int
+def predict_from_freesound_url(url: str):
+    if not url or not url.strip():
+        return html_error("URL vide", "Collez une URL FreeSound du type <code>https://freesound.org/s/123456/</code>")
+    # Parse sound_id
+    try:
+        sound_id = int(url.rstrip("/").split("/")[-1])
+    except Exception:
+        return html_error("URL invalide", "Impossible d'extraire l'ID depuis l'URL.")
+    # Champs à récupérer
+    all_features = list(set(
+        xgb_music_feat_num + xgb_music_feat_avg + xgb_effect_feat_num + xgb_effect_feat_avg
+    ))
+    fields = "duration," + ",".join(all_features)
+    try:
+        results = fs_client.search(query="", filter=f"id:{sound_id}", fields=fields)
+    except Exception as e:
+        return html_error("Erreur API FreeSound", f"Détail : <code>{e}</code>")
+    if len(results.results) == 0:
+        return html_error("Son introuvable", "Aucun résultat pour cet ID.")
+    sound = results.results[0]
+    duration = safe_float(sound.get("duration", 0))
+    # Vérif durées + type
+    if MIN_EFFECT <= duration <= MAX_EFFECT:
+        badge = "🔊 Effet sonore (FreeSound URL)"
+        num = predict_with_model_fs(xgb_effect_num, sound, xgb_effect_feat_num)
+        avg = predict_with_model_fs(xgb_effect_avg, sound, xgb_effect_feat_avg, le_effect_avg)
+        downloads_text = NUM_DOWNLOADS_MAP.get(num, str(num))
+        rating_text = str(avg)
+        extra = f'<div class="hint">ID FreeSound : <b>{sound_id}</b></div>'
+        return html_result(badge, duration, rating_text, downloads_text, extra_html=extra)
+    if MIN_MUSIC <= duration <= MAX_MUSIC:
+        badge = "🎵 Musique (FreeSound URL)"
+        num = predict_with_model_fs(xgb_music_num, sound, xgb_music_feat_num)
+        avg = predict_with_model_fs(xgb_music_avg, sound, xgb_music_feat_avg, le_music_avg)
+        downloads_text = NUM_DOWNLOADS_MAP.get(num, str(num))
+        rating_text = str(avg)
+        extra = f'<div class="hint">ID FreeSound : <b>{sound_id}</b></div>'
+        return html_result(badge, duration, rating_text, downloads_text, extra_html=extra)
+    return html_error(
+        "Durée non supportée",
+        f"Durée détectée : <b>{duration:.2f} s</b><br><br>"
+        f"Plages acceptées :<br>"
+        f"• Effet sonore : <b>{MIN_EFFECT}–{MAX_EFFECT} s</b><br>"
+        f"• Musique : <b>{MIN_MUSIC}–{MAX_MUSIC} s</b>"
+    )
+# =========================
+# APP UI (2 onglets)
+# =========================
+theme = gr.themes.Soft()
+with gr.Blocks(title="Démo — Popularité Audio", css=CSS) as demo:
     gr.HTML(
         f"""
+<div id="header-title">🎧 Démo — Prédiction de popularité audio</div>
 <p id="header-sub">
+Deux modes : <b>Upload audio</b> (openSMILE) ou <b>URL FreeSound</b> (features API).<br><br>
 <b>Durées acceptées :</b> 🔊 Effet sonore {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 Musique {MIN_MUSIC}–{MAX_MUSIC}s
 </p>
 """
     )
+    with gr.Tabs():
+        with gr.Tab("1) Upload audio (openSMILE)"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Importer un fichier")
+                    audio_in = gr.Audio(type="filepath", label="Fichier audio")
+                    btn_audio = gr.Button("🚀 Prédire (upload)", variant="primary")
+                with gr.Column(scale=1):
+                    gr.Markdown("### Résultat")
+                    out_audio = gr.HTML()
+            btn_audio.click(predict_from_uploaded_audio, inputs=audio_in, outputs=out_audio)
+        with gr.Tab("2) URL FreeSound (features API)"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    gr.Markdown("### Coller une URL FreeSound")
+                    url_in = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/")
+                    btn_url = gr.Button("🚀 Prédire (URL FreeSound)", variant="primary")
+                with gr.Column(scale=1):
+                    gr.Markdown("### Résultat")
+                    out_url = gr.HTML()
+            btn_url.click(predict_from_freesound_url, inputs=url_in, outputs=out_url)
+demo.launch(theme=theme)