IKRAMELHADI commited on
Commit
592252e
·
1 Parent(s): bb09077
Files changed (1) hide show
  1. app.py +264 -404
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import tempfile
3
  import numpy as np
4
  import pandas as pd
@@ -8,38 +9,69 @@ import joblib
8
  import soundfile as sf
9
  from pydub import AudioSegment
10
  import opensmile
11
-
12
  import freesound
13
  import xgboost as xgb
14
 
15
- from sklearn.feature_extraction.text import HashingVectorizer
16
-
17
 
18
- # ============================================================
19
  # CONFIG
20
- # ============================================================
21
  MIN_EFFECT, MAX_EFFECT = 0.5, 3.0
22
  MIN_MUSIC, MAX_MUSIC = 10.0, 60.0
23
  SR_TARGET = 16000
24
 
25
- # HF Space Secret: FREESOUND_TOKEN
26
  FREESOUND_TOKEN = os.getenv("FREESOUND_TOKEN", "").strip()
27
-
28
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
29
 
 
 
 
 
30
  def p(*parts):
31
  return os.path.join(BASE_DIR, *parts)
32
 
33
- def load_local(path_rel: str):
34
- full = p(path_rel)
35
- if not os.path.exists(full):
36
- raise FileNotFoundError(f"Fichier introuvable: {path_rel}")
37
- return joblib.load(full)
38
-
39
-
40
- # ============================================================
41
- # UI
42
- # ============================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  CSS = """
44
  .card { border: 1px solid #e5e7eb; background: #ffffff; padding: 16px; border-radius: 16px; }
45
  .card-error{ border-color: #fca5a5; background: #fff1f2; }
@@ -53,9 +85,9 @@ CSS = """
53
  .box-title{ font-weight:900; margin-bottom:4px; }
54
  .box-value{ font-size:18px; font-weight:800; }
55
  .hint{ margin-top:10px; color:#6b7280; font-size:12px; }
 
56
  #header-title { font-size: 28px; font-weight: 950; margin-bottom: 6px; }
57
  #header-sub { color:#6b7280; margin-top:0px; line-height:1.45; }
58
- pre{ white-space:pre-wrap; }
59
  """
60
 
61
  def html_error(title, body_html):
@@ -137,32 +169,38 @@ def parse_sound_id(url: str):
137
  return int(url.rstrip("/").split("/")[-1])
138
 
139
 
140
- # ============================================================
141
- # FREESOUND CLIENT
142
- # ============================================================
143
  def get_fs_client():
144
  if not FREESOUND_TOKEN:
145
- raise RuntimeError("Token FreeSound manquant. Ajoute le secret FREESOUND_TOKEN dans le Space.")
146
  c = freesound.FreesoundClient()
147
  c.set_token(FREESOUND_TOKEN, "token")
148
  return c
149
 
150
 
151
  # ============================================================
152
- # PARTIE A — Upload audio → openSMILE → modèles
153
- # (depuis app (2).py)
154
  # ============================================================
155
- MODEL_EFFECT_A = load_local("xgb_model_EffectSound.pkl")
156
- MODEL_MUSIC_A = load_local("xgb_model_Music.pkl")
157
-
158
- RATING_DISPLAY_AUDIO = {0: "❌ Informations manquantes", 1: "⭐ Faible", 2: "⭐⭐ Moyen", 3: "⭐⭐⭐ Élevé"}
159
- DOWNLOADS_DISPLAY_AUDIO = {0: "⭐ Faible", 1: "⭐⭐ Moyen", 2: "⭐⭐⭐ Élevé"}
160
-
161
  SMILE = opensmile.Smile(
162
  feature_set=opensmile.FeatureSet.eGeMAPSv02,
163
  feature_level=opensmile.FeatureLevel.Functionals,
164
  )
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  def get_duration_seconds(filepath):
167
  ext = os.path.splitext(filepath)[1].lower()
168
  if ext == ".mp3":
@@ -194,68 +232,74 @@ def extract_opensmile_features(filepath):
194
  return feats
195
 
196
  def predict_upload_with_dmatrix(model, X_df: pd.DataFrame):
197
- if hasattr(model, "estimators_"):
198
- preds = []
199
- for est in model.estimators_:
200
- booster = est.get_booster() if hasattr(est, "get_booster") else est
201
- dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns))
202
- p = booster.predict(dm)
203
- preds.append(np.asarray(p).reshape(-1))
204
- return np.column_stack(preds)
205
-
206
  booster = model.get_booster() if hasattr(model, "get_booster") else model
207
  dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns))
208
  p = booster.predict(dm)
209
- return np.asarray(p).reshape(1, -1)
 
 
 
 
 
 
210
 
211
  def predict_opensmile_upload(audio_file):
 
 
 
 
 
 
 
212
  if audio_file is None:
213
  return html_error("Aucun fichier", "Veuillez importer un fichier audio (wav, mp3, flac…).")
214
 
215
  try:
216
  duration = get_duration_seconds(audio_file)
217
  except Exception as e:
218
- return html_error("Audio illisible", f"Impossible de lire l'audio.<br>Détail : <code>{e}</code>")
219
 
220
  if duration < MIN_EFFECT:
221
- return html_error("Audio trop court",
222
- f"Durée : <b>{duration:.2f}s</b><br>Accepté: 0.5–3s ou 10–60s")
223
  if (MAX_EFFECT < duration < MIN_MUSIC) or duration > MAX_MUSIC:
224
- return html_error("Audio hors plage",
225
- f"Durée : <b>{duration:.2f}s</b><br>Accepté: 0.5–3s ou 10–60s")
226
 
227
  if duration <= MAX_EFFECT:
228
- badge = "🔊 OpenSMILE (upload) — EffectSound"
229
- model = MODEL_EFFECT_A
230
  else:
231
- badge = "🎵 OpenSMILE (upload) — Music"
232
- model = MODEL_MUSIC_A
233
 
234
  try:
235
  X = extract_opensmile_features(audio_file)
236
  except Exception as e:
237
  return html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>")
238
 
239
- # Align features
240
  try:
241
- expected = model.estimators_[0].feature_names_in_ if hasattr(model, "estimators_") else model.feature_names_in_
242
- X = X.reindex(columns=list(expected), fill_value=0)
243
- except Exception as e:
244
- return html_error("Alignement features échoué", f"Détail : <code>{e}</code>")
 
 
245
 
246
  try:
247
  y = predict_upload_with_dmatrix(model, X)
248
  except Exception as e:
249
  return html_error("Prédiction échouée", f"Détail : <code>{e}</code>")
250
 
251
- y = np.array(y)
252
- avg_class = int(y[0, 0])
253
- dl_class = int(y[0, 1])
 
 
 
254
 
255
  rating_text = RATING_DISPLAY_AUDIO.get(avg_class, "Inconnu")
256
  downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, "Inconnu")
257
-
258
  extra = f"""
 
259
  <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
260
  {interpret_results(avg_class, dl_class)}
261
  </div>
@@ -264,21 +308,94 @@ def predict_opensmile_upload(audio_file):
264
 
265
 
266
  # ============================================================
267
- # PARTIE B — FreeSound URL → “Acoustic features API fields” → modèles
268
- # (depuis app (2).py / PARTIE B)
269
  # ============================================================
270
- xgb_music_num_B = load_local("xgb_num_downloads_music_model.pkl")
271
- xgb_music_feat_num_B = load_local("xgb_num_downloads_music_features.pkl")
272
- xgb_music_avg_B = load_local("xgb_avg_rating_music_model.pkl")
273
- xgb_music_feat_avg_B = load_local("xgb_avg_rating_music_features.pkl")
274
- le_music_avg_B = load_local("xgb_avg_rating_music_label_encoder.pkl")
275
-
276
- xgb_effect_num_B = load_local("xgb_num_downloads_effectsound_model.pkl")
277
- xgb_effect_feat_num_B = load_local("xgb_num_downloads_effectsound_features.pkl")
278
- xgb_effect_avg_B = load_local("xgb_avg_rating_effectsound_model.pkl")
279
- xgb_effect_feat_avg_B = load_local("xgb_avg_rating_effectsound_features.pkl")
280
- le_effect_avg_B = load_local("xgb_avg_rating_effectsound_label_encoder.pkl")
281
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  NUM_DOWNLOADS_MAP_B = {0: "Faible", 1: "Moyen", 2: "Élevé"}
283
 
284
  def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None):
@@ -290,7 +407,7 @@ def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None):
290
  row.append(safe_float(val))
291
 
292
  X = pd.DataFrame([row], columns=feat_list)
293
- dmatrix = xgb.DMatrix(X.values, feature_names=feat_list)
294
 
295
  booster = model.get_booster() if hasattr(model, "get_booster") else model
296
  pred_int = int(booster.predict(dmatrix)[0])
@@ -300,6 +417,15 @@ def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None):
300
  return pred_int
301
 
302
  def predict_freesound_acoustic_features(url: str):
 
 
 
 
 
 
 
 
 
303
  if not url or not url.strip():
304
  return html_error("URL vide", "Colle une URL du type <code>https://freesound.org/s/123456/</code>")
305
 
@@ -313,8 +439,10 @@ def predict_freesound_acoustic_features(url: str):
313
  except Exception as e:
314
  return html_error("Token FreeSound", str(e))
315
 
 
316
  all_features = list(set(
317
- xgb_music_feat_num_B + xgb_music_feat_avg_B + xgb_effect_feat_num_B + xgb_effect_feat_avg_B
 
318
  ))
319
  fields = "duration," + ",".join(all_features)
320
 
@@ -330,14 +458,14 @@ def predict_freesound_acoustic_features(url: str):
330
  duration = safe_float(sound.get("duration", 0))
331
 
332
  if MIN_EFFECT <= duration <= MAX_EFFECT:
333
- badge = "🔊 FreeSound (API features acoustiques) — EffectSound"
334
- dl_class = int(predict_with_model_fs(xgb_effect_num_B, sound, xgb_effect_feat_num_B))
335
- avg_text = str(predict_with_model_fs(xgb_effect_avg_B, sound, xgb_effect_feat_avg_B, le_effect_avg_B))
336
  dl_text = NUM_DOWNLOADS_MAP_B.get(dl_class, str(dl_class))
337
-
338
  avg_class = avg_label_to_class(avg_text)
 
339
  extra = f"""
340
- <div class="hint">ID FreeSound : <b>{sound_id}</b></div>
341
  <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
342
  {interpret_results(avg_class, dl_class)}
343
  </div>
@@ -345,358 +473,91 @@ def predict_freesound_acoustic_features(url: str):
345
  return html_result(badge, duration, avg_text, dl_text, extra_html=extra)
346
 
347
  if MIN_MUSIC <= duration <= MAX_MUSIC:
348
- badge = "🎵 FreeSound (API features acoustiques) — Music"
349
- dl_class = int(predict_with_model_fs(xgb_music_num_B, sound, xgb_music_feat_num_B))
350
- avg_text = str(predict_with_model_fs(xgb_music_avg_B, sound, xgb_music_feat_avg_B, le_music_avg_B))
351
  dl_text = NUM_DOWNLOADS_MAP_B.get(dl_class, str(dl_class))
352
-
353
  avg_class = avg_label_to_class(avg_text)
 
354
  extra = f"""
355
- <div class="hint">ID FreeSound : <b>{sound_id}</b></div>
356
  <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
357
  {interpret_results(avg_class, dl_class)}
358
  </div>
359
  """
360
  return html_result(badge, duration, avg_text, dl_text, extra_html=extra)
361
 
362
- return html_error("Durée non supportée",
363
- f"Durée : <b>{duration:.2f}s</b><br>Accepté: 0.5–3s ou 10–60s")
364
 
365
 
366
  # ============================================================
367
- # PARTIE C — FreeSound URL Metadata preprocessing lourd → modèles
368
- # (depuis app (3).py)
369
  # ============================================================
370
-
371
- # ---- objets de preprocessing (local) ----
372
- # Music
373
- scaler_samplerate_music = load_local("music/scaler_music_samplerate.joblib")
374
- scaler_age_days_music = load_local("music/scaler_music_age_days_log.joblib")
375
- username_freq_music = load_local("music/username_freq_dict_music.joblib")
376
- est_num_downloads_music = load_local("music/est_num_downloads_music.joblib")
377
- avg_rating_transformer_music = load_local("music/avg_rating_transformer_music.joblib")
378
- music_subcategory_cols = load_local("music/music_subcategory_cols.joblib")
379
- music_onehot_cols = load_local("music/music_onehot_cols.joblib")
380
- music_onehot_tags = load_local("music/music_onehot_tags.joblib")
381
-
382
- # EffectSound
383
- scaler_samplerate_effect = load_local("effectSound/scaler_effectSamplerate.joblib")
384
- scaler_age_days_effect = load_local("effectSound/scaler_effectSound_age_days_log.joblib")
385
- username_freq_effect = load_local("effectSound/username_freq_dict_effectSound.joblib")
386
- est_num_downloads_effect = load_local("effectSound/est_num_downloads_effectSound.joblib")
387
- avg_rating_transformer_effect = load_local("effectSound/avg_rating_transformer_effectSound.joblib")
388
- effect_subcategory_cols = load_local("effectSound/effectSound_subcategory_cols.joblib")
389
- effect_onehot_cols = load_local("effectSound/effectSound_onehot_cols.joblib")
390
- effect_onehot_tags = load_local("effectSound/effect_onehot_tags.joblib")
391
-
392
- # ---- modèles metadata (local) ----
393
- music_model_num_downloads_C = load_local("music_model_num_downloads.joblib")
394
- music_model_avg_rating_C = load_local("music_xgb_avg_rating.joblib")
395
- music_avg_rating_le_C = load_local("music_xgb_avg_rating_label_encoder.joblib")
396
- music_model_features_C = load_local("music_model_features_list.joblib")
397
-
398
- effect_model_num_downloads_C = load_local("effectSound_model_num_downloads.joblib")
399
- effect_model_avg_rating_C = load_local("effectSound_xgb_avg_rating.joblib")
400
- effect_avg_rating_le_C = load_local("effectSound_xgb_avg_rating_label_encoder.joblib")
401
- effect_model_features_C = load_local("effect_model_features_list.joblib")
402
-
403
- # Dedup des listes (comme ton script)
404
- music_model_features_C = list(dict.fromkeys(music_model_features_C))
405
- effect_model_features_C = list(dict.fromkeys(effect_model_features_C))
406
-
407
- # ---- GloVe local (optionnel) ----
408
- # Mets un fichier local et indique son chemin via GLOVE_PATH si tu veux.
409
- # Exemple: GLOVE_PATH="models/glove.kv"
410
- GLOVE_PATH = os.getenv("GLOVE_PATH", "").strip()
411
- glove_model = None
412
-
413
- def try_load_glove():
414
- global glove_model
415
- if not GLOVE_PATH:
416
- glove_model = None
417
- return
418
- full = p(GLOVE_PATH)
419
- if not os.path.exists(full):
420
- glove_model = None
421
- return
422
- try:
423
- import gensim
424
- from gensim.models import KeyedVectors
425
- glove_model = KeyedVectors.load(full, mmap="r")
426
- except Exception:
427
- glove_model = None
428
-
429
- try_load_glove()
430
-
431
- def description_to_vec(text, model, dim=100):
432
- if model is None or not text:
433
- return np.zeros(dim, dtype=float)
434
- words = text.lower().split()
435
- vecs = [model[w] for w in words if w in model]
436
- if len(vecs) == 0:
437
- return np.zeros(dim, dtype=float)
438
- return np.mean(vecs, axis=0)
439
-
440
- def preprocess_name(df, vec_dim=8):
441
- df = df.copy()
442
- df["name_len"] = df["name_clean"].str.len()
443
- vectorizer = HashingVectorizer(n_features=vec_dim, alternate_sign=False, norm=None)
444
- name_vec_sparse = vectorizer.transform(df["name_clean"])
445
- name_vec_df = pd.DataFrame(
446
- name_vec_sparse.toarray(),
447
- columns=[f"name_vec_{i}" for i in range(vec_dim)],
448
- index=df.index
449
  )
450
- df = pd.concat([df, name_vec_df], axis=1)
451
- return df
452
-
453
- def fetch_sound_metadata(fs_client, sound_url):
454
- sound_id = parse_sound_id(sound_url)
455
- sound = fs_client.get_sound(sound_id)
456
- data = {
457
- "id": sound_id,
458
- "name": sound.name,
459
- "num_ratings": getattr(sound, "num_ratings", 0),
460
- "tags": ",".join(sound.tags) if getattr(sound, "tags", None) else "",
461
- "username": getattr(sound, "username", ""),
462
- "description": getattr(sound, "description", "") or "",
463
- "created": getattr(sound, "created", ""),
464
- "license": getattr(sound, "license", ""),
465
- "num_downloads": getattr(sound, "num_downloads", 0),
466
- "channels": getattr(sound, "channels", 0),
467
- "filesize": getattr(sound, "filesize", 0),
468
- "num_comments": getattr(sound, "num_comments", 0),
469
- "category_is_user_provided": getattr(sound, "category_is_user_provided", 0),
470
- "duration": getattr(sound, "duration", 0),
471
- "avg_rating": getattr(sound, "avg_rating", 0),
472
- "category": getattr(sound, "category", "Unknown"),
473
- "subcategory": getattr(sound, "subcategory", "Other"),
474
- "type": getattr(sound, "type", ""),
475
- "samplerate": getattr(sound, "samplerate", 0)
476
- }
477
- return pd.DataFrame([data])
478
-
479
- def preprocess_sound_metadata(df):
480
- df = df.copy()
481
- dur = float(df["duration"].iloc[0])
482
-
483
- if MIN_EFFECT <= dur <= MAX_EFFECT:
484
- dataset_type = "effectSound"
485
- scaler_samplerate = scaler_samplerate_effect
486
- scaler_age = scaler_age_days_effect
487
- username_freq = username_freq_effect
488
- est_num_downloads = est_num_downloads_effect
489
- avg_rating_transformer = avg_rating_transformer_effect
490
- subcat_cols = effect_subcategory_cols
491
- onehot_cols = effect_onehot_cols
492
- onehot_tags = effect_onehot_tags
493
- elif MIN_MUSIC <= dur <= MAX_MUSIC:
494
- dataset_type = "music"
495
- scaler_samplerate = scaler_samplerate_music
496
- scaler_age = scaler_age_days_music
497
- username_freq = username_freq_music
498
- est_num_downloads = est_num_downloads_music
499
- avg_rating_transformer = avg_rating_transformer_music
500
- subcat_cols = music_subcategory_cols
501
- onehot_cols = music_onehot_cols
502
- onehot_tags = music_onehot_tags
503
- else:
504
- return None, f"Durée hors plage ({dur:.2f}s)."
505
-
506
- # Category bool
507
- df["category_is_user_provided"] = df["category_is_user_provided"].astype(int)
508
-
509
- # Username frequency
510
- df["username_freq"] = df["username"].map(username_freq).fillna(0)
511
-
512
- # Numeric log1p
513
- for col in ["num_ratings", "num_comments", "filesize", "duration"]:
514
- df[col] = np.log1p(df[col])
515
-
516
- # samplerate scaled
517
- df["samplerate"] = scaler_samplerate.transform(df[["samplerate"]])
518
-
519
- # age_days
520
- df["created"] = pd.to_datetime(df["created"], errors="coerce").dt.tz_localize(None)
521
- df["age_days"] = (pd.Timestamp.now() - df["created"]).dt.days
522
- df["age_days_log"] = np.log1p(df["age_days"])
523
- df["age_days_log_scaled"] = scaler_age.transform(df[["age_days_log"]])
524
- df = df.drop(columns=["created", "age_days", "age_days_log"], errors="ignore")
525
-
526
- # num_downloads_class
527
- df["num_downloads_class"] = est_num_downloads.transform(df[["num_downloads"]])
528
-
529
- # avg_rating transform
530
- df["avg_rating"] = avg_rating_transformer.transform(df["avg_rating"].to_numpy())
531
-
532
- # Subcategory one-hot
533
- for col in subcat_cols:
534
- df[col] = 0
535
- subcat_val = df["subcategory"].iloc[0]
536
- for col in subcat_cols:
537
- cat_name = col.replace("subcategory_", "")
538
- if subcat_val == cat_name:
539
- df[col] = 1
540
- df.drop(columns=["subcategory"], inplace=True, errors="ignore")
541
-
542
- # onehot fixed columns
543
- for col in onehot_cols:
544
- if col not in df.columns:
545
- df[col] = 0
546
-
547
- license_val = df.loc[0, "license"]
548
- category_val = df.loc[0, "category"]
549
- type_val = df.loc[0, "type"]
550
-
551
- for col_name in [f"license_{license_val}", f"category_{category_val}", f"type_{type_val}"]:
552
- if col_name in df.columns:
553
- df[col_name] = 1
554
-
555
- # Tags one-hot
556
- for col in ["name", "tags", "description"]:
557
- if col not in df.columns:
558
- df[col] = ""
559
- for col in onehot_tags:
560
- if col not in df.columns:
561
- df[col] = 0
562
-
563
- tags_list = df["tags"].iloc[0].lower().split(",") if df["tags"].iloc[0] else []
564
- for col in onehot_tags:
565
- tag_name = col.replace("tag_", "").lower()
566
- if tag_name in tags_list:
567
- df[col] = 1
568
- df.drop(columns=["tags"], inplace=True, errors="ignore")
569
-
570
- # Name hashing
571
- df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(".", n=1).str[0]
572
- df = preprocess_name(df, vec_dim=8)
573
- df.drop(columns=["name", "name_clean"], inplace=True, errors="ignore")
574
-
575
- # Description vectors (GloVe local si dispo, sinon zeros)
576
- desc_vec = description_to_vec(df["description"].iloc[0], glove_model, dim=100)
577
- for i in range(100):
578
- df[f"description_glove_{i}"] = float(desc_vec[i])
579
- df.drop(columns=["description"], inplace=True, errors="ignore")
580
-
581
- # drop unused raw cols
582
- df.drop(columns=["license","category","type","subcategory","id","num_downloads","file_path","username"],
583
- inplace=True, errors="ignore")
584
-
585
- return df, dataset_type
586
-
587
- def predict_with_model_df(model, df_input, model_features, le=None):
588
- booster_feats = model.get_booster().feature_names
589
- X_aligned = df_input.reindex(columns=booster_feats, fill_value=0.0).astype(float)
590
- dmatrix = xgb.DMatrix(X_aligned.values, feature_names=booster_feats)
591
- preds = model.get_booster().predict(dmatrix)
592
- pred_val = preds[0]
593
- if len(preds.shape) > 1 and preds.shape[1] > 1:
594
- pred_int = int(np.argmax(pred_val))
595
- else:
596
- pred_int = int(round(float(pred_val)))
597
- if le is not None:
598
- try:
599
- return le.inverse_transform([pred_int])[0]
600
- except Exception:
601
- return f"Classe inconnue ({pred_int})"
602
- return pred_int
603
-
604
- def predict_freesound_metadata(url: str, show_debug: bool):
605
- if not url or not url.strip():
606
- return html_error("URL vide", "Colle une URL du type <code>https://freesound.org/s/123456/</code>")
607
-
608
- try:
609
- sound_id = parse_sound_id(url)
610
- except Exception:
611
- return html_error("URL invalide", "Impossible d'extraire l'ID depuis l'URL.")
612
-
613
- try:
614
- fs_client = get_fs_client()
615
- except Exception as e:
616
- return html_error("Token FreeSound", str(e))
617
 
618
- try:
619
- df_raw = fetch_sound_metadata(fs_client, url)
620
- except Exception as e:
621
- return html_error("Erreur API FreeSound", f"Détail : <code>{e}</code>")
622
 
623
- dur = float(df_raw["duration"].iloc[0])
624
- if dur < MIN_EFFECT:
625
- return html_error("Son trop court", f"Durée {dur:.2f}s. Accepté: 0.5–3s ou 10–60s")
626
- if (MAX_EFFECT < dur < MIN_MUSIC) or dur > MAX_MUSIC:
627
- return html_error("Son hors plage", f"Durée {dur:.2f}s. Accepté: 0.5–3s ou 10–60s")
628
-
629
- df_processed, dataset_type = preprocess_sound_metadata(df_raw)
630
- if df_processed is None:
631
- return html_error("Preprocessing metadata", "Impossible de prétraiter (durée hors plage).")
632
-
633
- # Choix modèles / features selon type
634
- if dataset_type == "effectSound":
635
- badge = "🔊 FreeSound (metadata) — EffectSound"
636
- model_nd = effect_model_num_downloads_C
637
- model_ar = effect_model_avg_rating_C
638
- model_features = effect_model_features_C
639
- le = effect_avg_rating_le_C
640
- else:
641
- badge = "🎵 FreeSound (metadata) — Music"
642
- model_nd = music_model_num_downloads_C
643
- model_ar = music_model_avg_rating_C
644
- model_features = music_model_features_C
645
- le = music_avg_rating_le_C
646
-
647
- # IMPORTANT: tu faisais drop avg_rating + num_downloads_class avant le modèle
648
- cols_to_remove = ["avg_rating", "num_downloads_class"]
649
- df_for_model = df_processed.drop(columns=[c for c in cols_to_remove if c in df_processed.columns], errors="ignore")
650
-
651
- # Forcer exactement les colonnes du modèle
652
- df_for_model = df_for_model.reindex(columns=model_features, fill_value=0.0).astype(float)
653
-
654
- pred_num_downloads_val = predict_with_model_df(model_nd, df_for_model, model_features, le=None)
655
- num_map = {0: "Low", 1: "Medium", 2: "High"}
656
- pred_num_downloads = num_map.get(pred_num_downloads_val, str(pred_num_downloads_val))
657
-
658
- pred_avg_rating = predict_with_model_df(model_ar, df_for_model, model_features, le=le)
659
- avg_class = avg_label_to_class(pred_avg_rating)
660
- dl_class_for_interp = {"Low":0,"Medium":1,"High":2}.get(pred_num_downloads, 1)
661
-
662
- debug_html = ""
663
- if show_debug:
664
- raw_txt = "\n".join([f"{c}: {df_raw.loc[0,c]}" for c in df_raw.columns])
665
- proc_txt = "\n".join([f"{c}: {df_processed.loc[0,c]}" for c in df_processed.columns[:120]]) # limite affichage
666
- glove_note = "OK" if glove_model is not None else "ABSENT (vecteurs à 0)"
667
- debug_html = f"""
668
- <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
669
- <div class="hint"><b>Debug</b> — GloVe: <b>{glove_note}</b></div>
670
- <details><summary>Voir métadonnées brutes</summary><pre>{raw_txt}</pre></details>
671
- <details><summary>Voir features après preprocessing (aperçu)</summary><pre>{proc_txt}</pre></details>
672
- </div>
673
- """
674
 
675
- extra = f"""
676
- <div class="hint">ID FreeSound : <b>{sound_id}</b></div>
677
- <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
678
- {interpret_results(avg_class, dl_class_for_interp)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
  </div>
680
- {debug_html}
681
- """
682
- return html_result(badge, dur, str(pred_avg_rating), str(pred_num_downloads), extra_html=extra)
683
 
684
 
685
  # ============================================================
686
- # GRADIO APP (3 onglets)
687
  # ============================================================
688
- with gr.Blocks(title="Popularité FreeSound — 3 pipelines", css=CSS, theme=gr.themes.Soft()) as demo:
689
  gr.HTML(f"""
690
- <div id="header-title">Popularité FreeSound — 3 pipelines</div>
691
  <p id="header-sub">
692
- <b>A)</b> Upload audio → <b>OpenSMILE</b><br>
693
- <b>B)</b> URL FreeSound <b>Features acoustiques via API fields</b><br>
694
- <b>C)</b> URL FreeSound <b>Metadata + preprocessing</b><br><br>
695
  <b>Durées acceptées :</b> 🔊 {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 {MIN_MUSIC}–{MAX_MUSIC}s
696
  </p>
697
  """)
698
 
699
  with gr.Tabs():
 
 
 
 
 
700
  with gr.Tab("A) Upload → OpenSMILE"):
701
  with gr.Row():
702
  with gr.Column():
@@ -719,10 +580,9 @@ with gr.Blocks(title="Popularité FreeSound — 3 pipelines", css=CSS, theme=gr.
719
  with gr.Row():
720
  with gr.Column():
721
  url_in = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/")
722
- show_debug = gr.Checkbox(label="Afficher debug (brut + aperçu features)", value=False)
723
  btn = gr.Button("🚀 Prédire (Metadata)", variant="primary")
724
  with gr.Column():
725
  out = gr.HTML()
726
- btn.click(predict_freesound_metadata, inputs=[url_in, show_debug], outputs=out)
727
 
728
  demo.launch()
 
1
  import os
2
+ import glob
3
  import tempfile
4
  import numpy as np
5
  import pandas as pd
 
9
  import soundfile as sf
10
  from pydub import AudioSegment
11
  import opensmile
 
12
  import freesound
13
  import xgboost as xgb
14
 
 
 
15
 
16
+ # =========================
17
  # CONFIG
18
+ # =========================
19
  MIN_EFFECT, MAX_EFFECT = 0.5, 3.0
20
  MIN_MUSIC, MAX_MUSIC = 10.0, 60.0
21
  SR_TARGET = 16000
22
 
 
23
  FREESOUND_TOKEN = os.getenv("FREESOUND_TOKEN", "").strip()
 
24
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
 
26
+
27
+ # =========================
28
+ # Helpers fichiers
29
+ # =========================
30
  def p(*parts):
31
  return os.path.join(BASE_DIR, *parts)
32
 
33
+ def list_local_files():
34
+ files = []
35
+ for root, _, fnames in os.walk(BASE_DIR):
36
+ for f in fnames:
37
+ if f.lower().endswith((".pkl", ".joblib", ".json", ".bin", ".txt")):
38
+ rel = os.path.relpath(os.path.join(root, f), BASE_DIR)
39
+ files.append(rel)
40
+ return sorted(files)
41
+
42
+ def exists(rel_path: str) -> bool:
43
+ return os.path.exists(p(rel_path))
44
+
45
+ def load_joblib_any(candidates):
46
+ """
47
+ Essaie une liste de chemins relatifs (ou patterns glob).
48
+ Retourne (obj, chosen_path) ou (None, None).
49
+ """
50
+ for c in candidates:
51
+ if any(ch in c for ch in ["*", "?", "["]):
52
+ matches = sorted(glob.glob(p(c)))
53
+ if not matches:
54
+ continue
55
+ chosen = matches[0]
56
+ try:
57
+ obj = joblib.load(chosen)
58
+ return obj, os.path.relpath(chosen, BASE_DIR)
59
+ except Exception:
60
+ continue
61
+ else:
62
+ full = p(c)
63
+ if os.path.exists(full):
64
+ try:
65
+ obj = joblib.load(full)
66
+ return obj, c
67
+ except Exception:
68
+ continue
69
+ return None, None
70
+
71
+
72
+ # =========================
73
+ # UI helpers
74
+ # =========================
75
  CSS = """
76
  .card { border: 1px solid #e5e7eb; background: #ffffff; padding: 16px; border-radius: 16px; }
77
  .card-error{ border-color: #fca5a5; background: #fff1f2; }
 
85
  .box-title{ font-weight:900; margin-bottom:4px; }
86
  .box-value{ font-size:18px; font-weight:800; }
87
  .hint{ margin-top:10px; color:#6b7280; font-size:12px; }
88
+ pre{ white-space:pre-wrap; }
89
  #header-title { font-size: 28px; font-weight: 950; margin-bottom: 6px; }
90
  #header-sub { color:#6b7280; margin-top:0px; line-height:1.45; }
 
91
  """
92
 
93
  def html_error(title, body_html):
 
169
  return int(url.rstrip("/").split("/")[-1])
170
 
171
 
172
+ # =========================
173
+ # FreeSound client
174
+ # =========================
175
  def get_fs_client():
176
  if not FREESOUND_TOKEN:
177
+ raise RuntimeError("Token FreeSound manquant. Ajoute le secret FREESOUND_TOKEN (Settings Secrets).")
178
  c = freesound.FreesoundClient()
179
  c.set_token(FREESOUND_TOKEN, "token")
180
  return c
181
 
182
 
183
  # ============================================================
184
+ # PARTIE A — OpenSMILE (upload)
 
185
  # ============================================================
 
 
 
 
 
 
186
  SMILE = opensmile.Smile(
187
  feature_set=opensmile.FeatureSet.eGeMAPSv02,
188
  feature_level=opensmile.FeatureLevel.Functionals,
189
  )
190
 
191
+ RATING_DISPLAY_AUDIO = {0: "❌ Informations manquantes", 1: "⭐ Faible", 2: "⭐⭐ Moyen", 3: "⭐⭐⭐ Élevé"}
192
+ DOWNLOADS_DISPLAY_AUDIO = {0: "⭐ Faible", 1: "⭐⭐ Moyen", 2: "⭐⭐⭐ Élevé"}
193
+
194
+ MODEL_EFFECT_A, PATH_EFFECT_A = load_joblib_any([
195
+ "xgb_model_EffectSound.pkl",
196
+ "xgb_model_effectsound.pkl",
197
+ "xgb_model_effectSound.pkl",
198
+ ])
199
+ MODEL_MUSIC_A, PATH_MUSIC_A = load_joblib_any([
200
+ "xgb_model_Music.pkl",
201
+ "xgb_model_music.pkl",
202
+ ])
203
+
204
  def get_duration_seconds(filepath):
205
  ext = os.path.splitext(filepath)[1].lower()
206
  if ext == ".mp3":
 
232
  return feats
233
 
234
  def predict_upload_with_dmatrix(model, X_df: pd.DataFrame):
235
+ # sklearn wrapper or Booster
 
 
 
 
 
 
 
 
236
  booster = model.get_booster() if hasattr(model, "get_booster") else model
237
  dm = xgb.DMatrix(X_df.values, feature_names=list(X_df.columns))
238
  p = booster.predict(dm)
239
+ p = np.asarray(p)
240
+ if p.ndim == 1:
241
+ # si ton modèle renvoie 2 outputs concat, ça ne marche pas;
242
+ # ton modèle A semble renvoyer 2 classes (avg, downloads) -> souvent shape (2,)
243
+ # on force (1, -1)
244
+ p = p.reshape(1, -1)
245
+ return p
246
 
247
  def predict_opensmile_upload(audio_file):
248
+ if MODEL_EFFECT_A is None or MODEL_MUSIC_A is None:
249
+ return html_error(
250
+ "Modèles OpenSMILE manquants",
251
+ "Il faut fournir les deux modèles OpenSMILE (effect & music) à la racine, ex: "
252
+ "<code>xgb_model_EffectSound.pkl</code> et <code>xgb_model_Music.pkl</code>."
253
+ )
254
+
255
  if audio_file is None:
256
  return html_error("Aucun fichier", "Veuillez importer un fichier audio (wav, mp3, flac…).")
257
 
258
  try:
259
  duration = get_duration_seconds(audio_file)
260
  except Exception as e:
261
+ return html_error("Audio illisible", f"Détail : <code>{e}</code>")
262
 
263
  if duration < MIN_EFFECT:
264
+ return html_error("Audio trop court", f"Durée : <b>{duration:.2f}s</b> — attendu 0.5–3s ou 10–60s")
 
265
  if (MAX_EFFECT < duration < MIN_MUSIC) or duration > MAX_MUSIC:
266
+ return html_error("Audio hors plage", f"Durée : <b>{duration:.2f}s</b> — attendu 0.5–3s ou 10–60s")
 
267
 
268
  if duration <= MAX_EFFECT:
269
+ badge, model = "🔊 OpenSMILE (upload) — EffectSound", MODEL_EFFECT_A
 
270
  else:
271
+ badge, model = "🎵 OpenSMILE (upload) — Music", MODEL_MUSIC_A
 
272
 
273
  try:
274
  X = extract_opensmile_features(audio_file)
275
  except Exception as e:
276
  return html_error("Extraction openSMILE échouée", f"Détail : <code>{e}</code>")
277
 
278
+ # align features si possible
279
  try:
280
+ if hasattr(model, "feature_names_in_"):
281
+ expected = list(model.feature_names_in_)
282
+ X = X.reindex(columns=expected, fill_value=0)
283
+ except Exception:
284
+ # pas bloquant
285
+ pass
286
 
287
  try:
288
  y = predict_upload_with_dmatrix(model, X)
289
  except Exception as e:
290
  return html_error("Prédiction échouée", f"Détail : <code>{e}</code>")
291
 
292
+ # Convention attendue : y[0,0]=avg_class, y[0,1]=dl_class
293
+ try:
294
+ avg_class = int(y[0, 0])
295
+ dl_class = int(y[0, 1])
296
+ except Exception:
297
+ return html_error("Sortie modèle inattendue", f"Forme sortie: <code>{getattr(y,'shape',None)}</code>")
298
 
299
  rating_text = RATING_DISPLAY_AUDIO.get(avg_class, "Inconnu")
300
  downloads_text = DOWNLOADS_DISPLAY_AUDIO.get(dl_class, "Inconnu")
 
301
  extra = f"""
302
+ <div class="hint">Modèles chargés: <code>{PATH_EFFECT_A}</code> · <code>{PATH_MUSIC_A}</code></div>
303
  <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
304
  {interpret_results(avg_class, dl_class)}
305
  </div>
 
308
 
309
 
310
  # ============================================================
311
+ # PARTIE B — FreeSound Acoustic Features (API fields)
312
+ # => c’est ici que tu as l’erreur de fichier manquant
313
  # ============================================================
314
+ def load_feature_models_B():
315
+ """
316
+ Essaie de trouver les fichiers même si tu as des variantes de nom.
317
+ Retourne dict + liste problèmes.
318
+ """
319
+ problems = []
320
+ M = {}
321
+
322
+ # MUSIC
323
+ M["music_num_model"], M["music_num_model_path"] = load_joblib_any([
324
+ "xgb_num_downloads_music_model.pkl",
325
+ "*num*downloads*music*model*.pkl",
326
+ "*num*downloads*music*model*.joblib",
327
+ ])
328
+ M["music_num_feats"], M["music_num_feats_path"] = load_joblib_any([
329
+ "xgb_num_downloads_music_features.pkl",
330
+ "*num*downloads*music*features*.pkl",
331
+ "*num*downloads*music*features*.joblib",
332
+ ])
333
+ M["music_avg_model"], M["music_avg_model_path"] = load_joblib_any([
334
+ "xgb_avg_rating_music_model.pkl",
335
+ "*avg*rating*music*model*.pkl",
336
+ "*avg*rating*music*model*.joblib",
337
+ ])
338
+ M["music_avg_feats"], M["music_avg_feats_path"] = load_joblib_any([
339
+ "xgb_avg_rating_music_features.pkl",
340
+ "*avg*rating*music*features*.pkl",
341
+ "*avg*rating*music*features*.joblib",
342
+ ])
343
+ M["music_avg_le"], M["music_avg_le_path"] = load_joblib_any([
344
+ "xgb_avg_rating_music_label_encoder.pkl",
345
+ "*avg*rating*music*label*encoder*.pkl",
346
+ "*avg*rating*music*label*encoder*.joblib",
347
+ ])
348
+
349
+ # EFFECTSOUND (variantes de nom)
350
+ M["eff_num_model"], M["eff_num_model_path"] = load_joblib_any([
351
+ "xgb_num_downloads_effectsound_model.pkl",
352
+ "xgb_num_downloads_effectSound_model.pkl",
353
+ "xgb_num_downloads_effect_sound_model.pkl",
354
+ "*num*downloads*effect*model*.pkl",
355
+ "*num*downloads*effect*model*.joblib",
356
+ ])
357
+ M["eff_num_feats"], M["eff_num_feats_path"] = load_joblib_any([
358
+ "xgb_num_downloads_effectsound_features.pkl",
359
+ "xgb_num_downloads_effectSound_features.pkl",
360
+ "xgb_num_downloads_effect_sound_features.pkl",
361
+ "*num*downloads*effect*features*.pkl",
362
+ "*num*downloads*effect*features*.joblib",
363
+ ])
364
+ M["eff_avg_model"], M["eff_avg_model_path"] = load_joblib_any([
365
+ "xgb_avg_rating_effectsound_model.pkl",
366
+ "xgb_avg_rating_effectSound_model.pkl",
367
+ "xgb_avg_rating_effect_sound_model.pkl",
368
+ "*avg*rating*effect*model*.pkl",
369
+ "*avg*rating*effect*model*.joblib",
370
+ ])
371
+ M["eff_avg_feats"], M["eff_avg_feats_path"] = load_joblib_any([
372
+ # <-- c’est exactement celui qui manque chez toi, on met plein de variantes
373
+ "xgb_avg_rating_effectsound_features.pkl",
374
+ "xgb_avg_rating_effectSound_features.pkl",
375
+ "xgb_avg_rating_effect_sound_features.pkl",
376
+ "*avg*rating*effect*features*.pkl",
377
+ "*avg*rating*effect*features*.joblib",
378
+ ])
379
+ M["eff_avg_le"], M["eff_avg_le_path"] = load_joblib_any([
380
+ "xgb_avg_rating_effectsound_label_encoder.pkl",
381
+ "xgb_avg_rating_effectSound_label_encoder.pkl",
382
+ "xgb_avg_rating_effect_sound_label_encoder.pkl",
383
+ "*avg*rating*effect*label*encoder*.pkl",
384
+ "*avg*rating*effect*label*encoder*.joblib",
385
+ ])
386
+
387
+ required = [
388
+ ("music_num_model", "music_num_feats", "music_avg_model", "music_avg_feats", "music_avg_le"),
389
+ ("eff_num_model", "eff_num_feats", "eff_avg_model", "eff_avg_feats", "eff_avg_le"),
390
+ ]
391
+ for group in required:
392
+ for k in group:
393
+ if M.get(k) is None:
394
+ problems.append(k)
395
+
396
+ return M, problems
397
+
398
+ MODELS_B, PROBLEMS_B = load_feature_models_B()
399
  NUM_DOWNLOADS_MAP_B = {0: "Faible", 1: "Moyen", 2: "Élevé"}
400
 
401
  def predict_with_model_fs(model, features_dict, feat_list, label_encoder=None):
 
407
  row.append(safe_float(val))
408
 
409
  X = pd.DataFrame([row], columns=feat_list)
410
+ dmatrix = xgb.DMatrix(X.values, feature_names=list(feat_list))
411
 
412
  booster = model.get_booster() if hasattr(model, "get_booster") else model
413
  pred_int = int(booster.predict(dmatrix)[0])
 
417
  return pred_int
418
 
419
  def predict_freesound_acoustic_features(url: str):
420
+ if PROBLEMS_B:
421
+ missing = ", ".join(f"<code>{k}</code>" for k in PROBLEMS_B)
422
+ files = "<br>".join(list_local_files()[:200])
423
+ return html_error(
424
+ "Modèles Features API incomplets",
425
+ f"Il manque des fichiers nécessaires au pipeline B :<br>{missing}<br><br>"
426
+ f"<b>Fichiers détectés dans ton Space (aperçu)</b>:<br><pre>{files}</pre>"
427
+ )
428
+
429
  if not url or not url.strip():
430
  return html_error("URL vide", "Colle une URL du type <code>https://freesound.org/s/123456/</code>")
431
 
 
439
  except Exception as e:
440
  return html_error("Token FreeSound", str(e))
441
 
442
+ # champs à récupérer
443
  all_features = list(set(
444
+ MODELS_B["music_num_feats"] + MODELS_B["music_avg_feats"] +
445
+ MODELS_B["eff_num_feats"] + MODELS_B["eff_avg_feats"]
446
  ))
447
  fields = "duration," + ",".join(all_features)
448
 
 
458
  duration = safe_float(sound.get("duration", 0))
459
 
460
  if MIN_EFFECT <= duration <= MAX_EFFECT:
461
+ badge = "🔊 FreeSound (Features acoustiques API) — EffectSound"
462
+ dl_class = int(predict_with_model_fs(MODELS_B["eff_num_model"], sound, MODELS_B["eff_num_feats"]))
463
+ avg_text = str(predict_with_model_fs(MODELS_B["eff_avg_model"], sound, MODELS_B["eff_avg_feats"], MODELS_B["eff_avg_le"]))
464
  dl_text = NUM_DOWNLOADS_MAP_B.get(dl_class, str(dl_class))
 
465
  avg_class = avg_label_to_class(avg_text)
466
+
467
  extra = f"""
468
+ <div class="hint">ID: <b>{sound_id}</b></div>
469
  <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
470
  {interpret_results(avg_class, dl_class)}
471
  </div>
 
473
  return html_result(badge, duration, avg_text, dl_text, extra_html=extra)
474
 
475
  if MIN_MUSIC <= duration <= MAX_MUSIC:
476
+ badge = "🎵 FreeSound (Features acoustiques API) — Music"
477
+ dl_class = int(predict_with_model_fs(MODELS_B["music_num_model"], sound, MODELS_B["music_num_feats"]))
478
+ avg_text = str(predict_with_model_fs(MODELS_B["music_avg_model"], sound, MODELS_B["music_avg_feats"], MODELS_B["music_avg_le"]))
479
  dl_text = NUM_DOWNLOADS_MAP_B.get(dl_class, str(dl_class))
 
480
  avg_class = avg_label_to_class(avg_text)
481
+
482
  extra = f"""
483
+ <div class="hint">ID: <b>{sound_id}</b></div>
484
  <div style="margin-top:12px; padding-top:10px; border-top:1px dashed #d1d5db">
485
  {interpret_results(avg_class, dl_class)}
486
  </div>
487
  """
488
  return html_result(badge, duration, avg_text, dl_text, extra_html=extra)
489
 
490
+ return html_error("Durée non supportée", f"Durée : <b>{duration:.2f}s</b> — attendu 0.5–3s ou 10–60s")
 
491
 
492
 
493
  # ============================================================
494
+ # PARTIE C — Metadata (désactivée si pas de dossiers/fichiers)
 
495
  # ============================================================
496
+ def predict_freesound_metadata_stub(url: str):
497
+ return html_error(
498
+ "Pipeline Metadata non disponible",
499
+ "Tu as dit ne pas avoir les dossiers <code>music/</code> et <code>effectSound/</code> "
500
+ "et/ou les joblib de preprocessing. Donc je n’active pas ce pipeline pour éviter de crasher."
501
+ "<br><br>Si tu veux l’activer : ajoute les joblib de preprocessing + les modèles metadata, "
502
+ "ou dis-moi comment tu les as nommés/organisés."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
 
 
 
 
 
505
 
506
+ # ============================================================
507
+ # Page “diagnostic”
508
+ # ============================================================
509
+ def status_page():
510
+ files = list_local_files()
511
+ files_txt = "\n".join(files) if files else "(aucun fichier .pkl/.joblib détecté)"
512
+ a_ok = (MODEL_EFFECT_A is not None and MODEL_MUSIC_A is not None)
513
+ b_ok = (len(PROBLEMS_B) == 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ return f"""
516
+ <div class="card">
517
+ <div class="card-title">📦 Diagnostic du Space</div>
518
+ <div class="grid">
519
+ <div class="box">
520
+ <div class="box-title">OpenSMILE (A)</div>
521
+ <div class="box-value">{'✅ OK' if a_ok else '❌ modèles manquants'}</div>
522
+ <div class="hint">Effect: <code>{PATH_EFFECT_A or 'non chargé'}</code><br>Music: <code>{PATH_MUSIC_A or 'non chargé'}</code></div>
523
+ </div>
524
+ <div class="box">
525
+ <div class="box-title">Features API (B)</div>
526
+ <div class="box-value">{'✅ OK' if b_ok else '❌ incomplet'}</div>
527
+ <div class="hint">Manquants: <code>{', '.join(PROBLEMS_B) if PROBLEMS_B else 'aucun'}</code></div>
528
+ </div>
529
+ <div class="box">
530
+ <div class="box-title">Metadata (C)</div>
531
+ <div class="box-value">⚠️ désactivé si dossiers/joblib absents</div>
532
+ <div class="hint">Activer seulement si preprocessing joblib présents.</div>
533
+ </div>
534
+ </div>
535
+ <div class="hint" style="margin-top:12px"><b>Fichiers détectés</b> :</div>
536
+ <pre>{files_txt}</pre>
537
  </div>
538
+ """.strip()
 
 
539
 
540
 
541
  # ============================================================
542
+ # GRADIO UI
543
  # ============================================================
544
+ with gr.Blocks(title="Popularité FreeSound — Pipelines séparés", css=CSS, theme=gr.themes.Soft()) as demo:
545
  gr.HTML(f"""
546
+ <div id="header-title">Popularité FreeSound — Pipelines séparés</div>
547
  <p id="header-sub">
548
+ <b>A)</b> Upload → OpenSMILE<br>
549
+ <b>B)</b> URL → Features acoustiques FreeSound (API fields)<br>
550
+ <b>C)</b> URL → Metadata FreeSound (désactivé si fichiers absents)<br><br>
551
  <b>Durées acceptées :</b> 🔊 {MIN_EFFECT}–{MAX_EFFECT}s · 🎵 {MIN_MUSIC}–{MAX_MUSIC}s
552
  </p>
553
  """)
554
 
555
  with gr.Tabs():
556
+ with gr.Tab("📦 Diagnostic"):
557
+ diag = gr.HTML(value=status_page())
558
+ btn_refresh = gr.Button("Rafraîchir diagnostic")
559
+ btn_refresh.click(lambda: status_page(), outputs=diag)
560
+
561
  with gr.Tab("A) Upload → OpenSMILE"):
562
  with gr.Row():
563
  with gr.Column():
 
580
  with gr.Row():
581
  with gr.Column():
582
  url_in = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/")
 
583
  btn = gr.Button("🚀 Prédire (Metadata)", variant="primary")
584
  with gr.Column():
585
  out = gr.HTML()
586
+ btn.click(predict_freesound_metadata_stub, inputs=url_in, outputs=out)
587
 
588
  demo.launch()