IKRAMELHADI commited on
Commit
0689a72
·
1 Parent(s): b27102c

Add demo interface + models

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. app.py +170 -297
  3. requirements.txt +7 -6
  4. xgb_model_EffectSound.pkl +3 -0
  5. xgb_model_Music.pkl +3 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
app.py CHANGED
@@ -1,307 +1,180 @@
1
- import gradio as gr
2
  import os
3
- import pandas as pd
4
  import numpy as np
5
- from sklearn.preprocessing import KBinsDiscretizer, StandardScaler, OneHotEncoder
6
- from sklearn.feature_extraction.text import HashingVectorizer
7
- from collections import Counter
8
  import joblib
9
- import freesound
10
- import gensim.downloader as api
11
-
12
- # -------- FreeSound API --------
13
- client = freesound.FreesoundClient()
14
- client.set_token("zE9NjEOgUMzH9K7mjiGBaPJiNwJLjSM53LevarRK", "token")
15
-
16
- dataset_dir = "dataset_audio"
17
- os.makedirs(dataset_dir, exist_ok=True)
18
-
19
- class AvgRatingTransformer:
20
- def __init__(self, est, class_mapping=None):
21
- self.est = est
22
- if class_mapping is None:
23
- self.class_mapping = {0:"MissedInfo", 1:"Low", 2:"Medium", 3:"High"}
24
- else:
25
- self.class_mapping = class_mapping
26
-
27
- def transform(self, X):
28
- X = X.copy()
29
- mask_non_zero = X != 0
30
- Xt = np.zeros_like(X, dtype=int)
31
- if mask_non_zero.any():
32
- Xt[mask_non_zero] = self.est.transform(X[mask_non_zero].reshape(-1,1)).flatten() + 1
33
- X_transformed = np.array([self.class_mapping.get(v, "MissedInfo") for v in Xt])
34
- return X_transformed
35
-
36
-
37
- # -------- Charger les objets sauvegardés --------
38
- # Music
39
- scaler_samplerate_music = joblib.load("music/scaler_music_samplerate.joblib")
40
- scaler_age_days_music = joblib.load("music/scaler_music_age_days_log.joblib")
41
- username_freq_music = joblib.load("music/username_freq_dict_music.joblib")
42
- est_num_downloads_music = joblib.load("music/est_num_downloads_music.joblib")
43
- avg_rating_transformer_music = joblib.load("music/avg_rating_transformer_music.joblib")
44
- music_subcategory_cols = joblib.load("music/music_subcategory_cols.joblib")
45
- music_onehot_cols = joblib.load("music/music_onehot_cols.joblib")
46
- music_onehot_tags = joblib.load("music/music_onehot_tags.joblib")
47
-
48
-
49
-
50
- # EffectSound
51
- scaler_samplerate_effect = joblib.load("effectSound/scaler_effectSamplerate.joblib")
52
- scaler_age_days_effect = joblib.load("effectSound/scaler_effectSound_age_days_log.joblib")
53
- username_freq_effect = joblib.load("effectSound/username_freq_dict_effectSound.joblib")
54
- est_num_downloads_effect = joblib.load("effectSound/est_num_downloads_effectSound.joblib")
55
- avg_rating_transformer_effect = joblib.load("effectSound/avg_rating_transformer_effectSound.joblib")
56
- effect_subcategory_cols = joblib.load("effectSound/effectSound_subcategory_cols.joblib")
57
- effect_onehot_cols = joblib.load("effectSound/effectSound_onehot_cols.joblib")
58
- effect_onehot_tags = joblib.load("effectSound/effect_onehot_tags.joblib")
59
-
60
- # GloVe pour description
61
- glove_model = api.load("glove-wiki-gigaword-100")
62
-
63
- # -------- Fonctions --------
64
-
65
-
66
-
67
- def fetch_sound_metadata(sound_url):
68
- """Télécharge les métadonnées du son FreeSound"""
69
- sound_id = int(sound_url.rstrip("/").split("/")[-1])
70
- sound = client.get_sound(sound_id)
71
- file_name = f"{sound.name.replace(' ', '_')}.mp3"
72
- file_path = os.path.join(dataset_dir, file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  try:
74
- sound.retrieve_preview(dataset_dir, file_name)
75
  except Exception as e:
76
- print(f"Erreur téléchargement {file_name}: {e}")
77
- file_path = None
78
- data = {
79
- "file_path": file_path,
80
- "name": sound.name,
81
- "num_ratings": sound.num_ratings,
82
- "tags": ",".join(sound.tags) if getattr(sound, "tags", None) else "",
83
- "username": sound.username,
84
- "description": sound.description if sound.description else "",
85
- "created": getattr(sound, "created", ""),
86
- "license": getattr(sound, "license", ""),
87
- "num_downloads": getattr(sound, "num_downloads", 0),
88
- "channels": getattr(sound, "channels", 0),
89
- "filesize": getattr(sound, "filesize", 0),
90
- "num_comments": getattr(sound, "num_comments", 0),
91
- "category_is_user_provided": getattr(sound, "category_is_user_provided", 0),
92
- "duration": getattr(sound, "duration", 0),
93
- "avg_rating": getattr(sound, "avg_rating", 0),
94
- "category": getattr(sound, "category", "Unknown"),
95
- "subcategory": getattr(sound, "subcategory", "Other"),
96
- "type": getattr(sound, "type", ""),
97
- "samplerate": getattr(sound, "samplerate", 0)
98
- }
99
- return pd.DataFrame([data])
100
-
101
- def description_to_vec(text, model, dim=100):
102
- if not text:
103
- return np.zeros(dim)
104
- words = text.lower().split()
105
- vecs = [model[w] for w in words if w in model]
106
- if len(vecs) == 0:
107
- return np.zeros(dim)
108
- return np.mean(vecs, axis=0)
109
-
110
- def preprocess_sound(df):
111
- """Applique le preprocessing complet selon duration pour choisir music ou effectSound"""
112
- df = df.copy()
113
- dur = df["duration"].iloc[0]
114
-
115
- if 0.5 <= dur <= 3:
116
- dataset_type = "effectSound"
117
- scaler_samplerate = scaler_samplerate_effect
118
- scaler_age = scaler_age_days_effect
119
- username_freq = username_freq_effect
120
- est_num_downloads = est_num_downloads_effect
121
- avg_rating_transformer = avg_rating_transformer_effect
122
- subcat_cols = effect_subcategory_cols
123
- onehot_cols = effect_onehot_cols
124
- onehot_tags = effect_onehot_tags
125
- elif 10 <= dur <= 60:
126
- dataset_type = "music"
127
- scaler_samplerate = scaler_samplerate_music
128
- scaler_age = scaler_age_days_music
129
- username_freq = username_freq_music
130
- est_num_downloads = est_num_downloads_music
131
- avg_rating_transformer = avg_rating_transformer_music
132
- subcat_cols = music_subcategory_cols
133
- onehot_cols = music_onehot_cols
134
- onehot_tags = music_onehot_tags
135
  else:
136
- return f"❌ Son trop court ou trop long ({dur} sec)"
137
-
138
- # ----------------- Features -----------------
139
- # Category bool
140
- df["category_is_user_provided"] = df["category_is_user_provided"].astype(int)
141
-
142
-
143
- # Username frequency
144
- df["username_freq"] = df["username"].map(username_freq).fillna(0)
145
-
146
- # Numeric features
147
- for col in ["num_ratings", "num_comments", "filesize", "duration"]:
148
- df[col] = np.log1p(df[col])
149
- df["samplerate"] = scaler_samplerate.transform(df[["samplerate"]])
150
-
151
- # Age_days
152
- df["created"] = pd.to_datetime(df["created"], errors="coerce").dt.tz_localize(None)
153
- df["age_days"] = (pd.Timestamp.now() - df["created"]).dt.days
154
- df["age_days_log"] = np.log1p(df["age_days"])
155
- df["age_days_log_scaled"] = scaler_age.transform(df[["age_days_log"]])
156
- df = df.drop(columns=["created", "age_days", "age_days_log"])
157
-
158
- # num_downloads
159
- df["num_downloads_class"] = est_num_downloads.transform(df[["num_downloads"]])
160
-
161
- # avg_rating
162
- df["avg_rating"] = avg_rating_transformer.transform(df["avg_rating"].to_numpy())
163
-
164
- # Subcategory
165
- for col in subcat_cols:
166
- df[col] = 0 # toutes les colonnes initialisées à 0
167
- # activer 1 pour la bonne subcategory
168
- subcat_val = df["subcategory"].iloc[0]
169
- for col in subcat_cols:
170
- cat_name = col.replace("subcategory_", "")
171
- if subcat_val == cat_name:
172
- df[col] = 1
173
- df.drop(columns=["subcategory"], inplace=True)
174
-
175
-
176
-
177
- # créer toutes les colonnes attendues à 0
178
- for col in onehot_cols:
179
- if col not in df.columns:
180
- df[col] = 0
181
-
182
- # activer les bonnes colonnes one-hot
183
- license_val = df.loc[0, "license"]
184
- category_val = df.loc[0, "category"]
185
- type_val = df.loc[0, "type"]
186
-
187
- for col_name in [
188
- f"license_{license_val}",
189
- f"category_{category_val}",
190
- f"type_{type_val}",
191
- ]:
192
- if col_name in df.columns:
193
- df[col_name] = 1
194
-
195
-
196
-
197
-
198
-
199
- # Tags
200
- # Si la colonne "tags" n'existe pas, on la crée avec une valeur vide
201
- for col in ["name", "tags", "description"]:
202
- if col not in df.columns:
203
- df[col] = ""
204
-
205
- df["tags_list"] = df["tags"].fillna("").astype(str).str.lower().str.split(",")
206
-
207
- # Si aucun tag n'existe ou que la liste est vide, mettre "Other"
208
- if not df["tags_list"].iloc[0] or df["tags_list"].iloc[0] == [""]:
209
- df["tags_list"] = [["Other"]]
210
-
211
- # One-hot sur toutes les colonnes enregistrées
212
- for col in onehot_tags:
213
- tag_name = col.replace("tag_", "").replace("_", " ")
214
- df[col] = int(tag_name in df["tags_list"].iloc[0])
215
-
216
- # Supprimer les colonnes temporaires
217
- df.drop(columns=["tags_list", "tags"], inplace=True)
218
-
219
-
220
- # Name
221
- df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(".", n=1).str[0]
222
- vectorizer = HashingVectorizer(n_features=8, alternate_sign=False, norm=None)
223
- name_vec = vectorizer.transform(df["name_clean"])
224
- for i in range(8):
225
- df[f"name_vec_{i}"] = name_vec.toarray()[0][i]
226
- df.drop(columns=["name","name_clean"], inplace=True)
227
-
228
-
229
- # Description
230
- desc_vec = description_to_vec(df["description"].iloc[0], glove_model)
231
- for i in range(100):
232
- df[f"description_glove_{i}"] = desc_vec[i]
233
- df.drop(columns=["description"], inplace=True)
234
-
235
-
236
- df.drop(columns=[ "license","category","type","created","subcategory","id","num_downloads","file_path","username"],inplace=True, errors="ignore")
237
-
238
- # --- SAFE REORDER (CRUCIAL) ---
239
-
240
- final_cols = []
241
-
242
- for col in onehot_cols:
243
- if col in df.columns:
244
- final_cols.append(col)
245
-
246
- # subcategories
247
- for col in subcat_cols:
248
- if col in df.columns:
249
- final_cols.append(col)
250
-
251
- # le reste
252
- final_cols += [c for c in df.columns if c not in final_cols]
253
-
254
- df = df[final_cols]
255
-
256
-
257
-
258
-
259
- return df
260
-
261
- # -------- Gradio --------
262
- def predict_with_metadata(url):
263
- if url.strip() == "":
264
- return "❌ Veuillez entrer une URL FreeSound."
265
-
266
- # 1️ Récupérer les métadonnées brutes
267
- df_raw = fetch_sound_metadata(url)
268
-
269
- # Affichage ligne par ligne pour les métadonnées brutes
270
- raw_lines = ["=== Métadonnées brutes ==="]
271
- for col in df_raw.columns:
272
- raw_lines.append(f"{col}: {df_raw[col].iloc[0]}")
273
- raw_str = "\n".join(raw_lines)
274
 
275
- # 2️ Vérifier la durée
276
- dur = df_raw["duration"].iloc[0]
277
- if dur < 0.5:
278
- return raw_str + f"\n\n Son trop court ({dur} sec), veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60 s)"
279
- elif 3 < dur < 10 or dur > 60:
280
- return raw_str + f"\n\n Son trop long ou hors plage acceptable ({dur} sec) , veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60 s)"
281
-
282
- # 3️ Prétraitement seulement si durée ok
283
- df_processed = preprocess_sound(df_raw)
284
-
285
- # Affichage ligne par ligne pour les features après preprocessing
286
- processed_lines = ["\n=== Features après preprocessing ==="]
287
- for col in df_processed.columns:
288
- processed_lines.append(f"{col}: {df_processed[col].iloc[0]}")
289
- processed_str = "\n".join(processed_lines)
290
-
291
- return raw_str + processed_str
292
-
293
-
294
-
295
-
296
-
297
- with gr.Blocks(title="FreeSound Popularity Detector") as demo:
298
- gr.Markdown("# 🎧 FreeSound Popularity Detector")
299
- gr.Markdown("Collez l'URL d'un son FreeSound et le preprocessing complet sera appliqué automatiquement.")
300
-
301
- url_input = gr.Textbox(label="URL du son FreeSound")
302
- btn_meta = gr.Button("📊 Prétraiter et afficher features")
303
- output = gr.Textbox(label="Résultat")
304
 
305
- btn_meta.click(fn=predict_with_metadata, inputs=url_input, outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  demo.launch()
 
 
1
  import os
2
+ import tempfile
3
  import numpy as np
4
+ import pandas as pd
5
+ import gradio as gr
6
+
7
  import joblib
8
+ import soundfile as sf
9
+ from pydub import AudioSegment
10
+
11
+ import opensmile
12
+
13
+
14
+ # =========================
15
+ # Config
16
+ # =========================
17
+ SR_TARGET = 16000
18
+ MIN_EFFECT, MAX_EFFECT = 0.5, 3.0
19
+ MIN_MUSIC, MAX_MUSIC = 10.0, 60.0
20
+
21
+ MODEL_EFFECT_PATH = "xgb_model_EffectSound.pkl"
22
+ MODEL_MUSIC_PATH = "xgb_model_Music.pkl"
23
+
24
+ # openSMILE (comme ton script)
25
+ SMILE = opensmile.Smile(
26
+ feature_set=opensmile.FeatureSet.eGeMAPSv02,
27
+ feature_level=opensmile.FeatureLevel.Functionals,
28
+ )
29
+
30
+ # Charger modèles (sans print, pour éviter les soucis de repr)
31
+ MODEL_EFFECT = joblib.load(MODEL_EFFECT_PATH)
32
+ MODEL_MUSIC = joblib.load(MODEL_MUSIC_PATH)
33
+
34
+
35
+ # =========================
36
+ # Helpers audio
37
+ # =========================
38
+ def get_duration_seconds(filepath: str) -> float:
39
+ ext = os.path.splitext(filepath)[1].lower()
40
+ if ext == ".mp3":
41
+ audio = AudioSegment.from_file(filepath)
42
+ return len(audio) / 1000.0
43
+ # wav / flac / ogg...
44
+ with sf.SoundFile(filepath) as f:
45
+ return len(f) / float(f.samplerate)
46
+
47
+ def to_wav_16k_mono(filepath: str) -> str:
48
+ """
49
+ Convertit l'audio en WAV 16k mono pour openSMILE.
50
+ Retourne le chemin d’un fichier wav temporaire.
51
+ """
52
+ ext = os.path.splitext(filepath)[1].lower()
53
+
54
+ # Si WAV déjà ok, on peut le garder (mais on vérifie sr/channels)
55
+ if ext == ".wav":
56
+ try:
57
+ with sf.SoundFile(filepath) as f:
58
+ if f.samplerate == SR_TARGET and f.channels == 1:
59
+ return filepath
60
+ except Exception:
61
+ pass
62
+
63
+ audio = AudioSegment.from_file(filepath)
64
+ audio = audio.set_channels(1).set_frame_rate(SR_TARGET)
65
+
66
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
67
+ tmp.close()
68
+ audio.export(tmp.name, format="wav")
69
+ return tmp.name
70
+
71
+
72
+ def extract_opensmile_features(filepath: str) -> pd.DataFrame:
73
+ wav_path = to_wav_16k_mono(filepath)
74
+ feats = SMILE.process_file(wav_path)
75
+
76
+ # Nettoyage : garder uniquement colonnes numériques
77
+ feats = feats.select_dtypes(include=[np.number]).copy()
78
+ feats.reset_index(drop=True, inplace=True)
79
+ return feats
80
+
81
+
82
+ # =========================
83
+ # Prediction
84
+ # =========================
85
+ def predict_popularity(audio_file):
86
+ """
87
+ audio_file: chemin fourni par Gradio (type="filepath")
88
+ """
89
+ if audio_file is None:
90
+ return "❌ Merci d’uploader un fichier audio."
91
+
92
+ path = audio_file
93
  try:
94
+ dur = get_duration_seconds(path)
95
  except Exception as e:
96
+ return f" Impossible de lire l’audio : {e}"
97
+
98
+ # Vérif plage
99
+ if dur < MIN_EFFECT:
100
+ return (
101
+ f"❌ Audio trop court ({dur:.2f}s).\n\n"
102
+ f"Plages acceptées :\n"
103
+ f"- SoundEffect : {MIN_EFFECT} à {MAX_EFFECT} secondes\n"
104
+ f"- Music : {MIN_MUSIC} à {MAX_MUSIC} secondes"
105
+ )
106
+
107
+ if (MAX_EFFECT < dur < MIN_MUSIC) or (dur > MAX_MUSIC):
108
+ return (
109
+ f" Audio trop long / hors plage ({dur:.2f}s).\n\n"
110
+ f"Plages acceptées :\n"
111
+ f"- SoundEffect : {MIN_EFFECT} à {MAX_EFFECT} secondes\n"
112
+ f"- Music : {MIN_MUSIC} à {MAX_MUSIC} secondes"
113
+ )
114
+
115
+ # Choix type
116
+ if MIN_EFFECT <= dur <= MAX_EFFECT:
117
+ dataset_type = "SoundEffect"
118
+ model = MODEL_EFFECT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  else:
120
+ dataset_type = "Music"
121
+ model = MODEL_MUSIC
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ # Extraction openSMILE
124
+ try:
125
+ X = extract_opensmile_features(path)
126
+ except Exception as e:
127
+ return f"❌ Erreur extraction openSMILE : {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ # Prédiction
130
+ try:
131
+ y = model.predict(X)
132
+ except Exception as e:
133
+ return (
134
+ "❌ Erreur pendant la prédiction.\n\n"
135
+ f"Détail: {e}\n\n"
136
+ "👉 Si ça arrive sur Space, c’est souvent un souci de versions (sklearn/xgboost). "
137
+ "Voir requirements.txt proposé plus bas."
138
+ )
139
+
140
+ # y peut être (1,2) ou autre. On gère robuste.
141
+ y = np.array(y)
142
+
143
+ # Essai : 2 sorties
144
+ if y.ndim == 2 and y.shape[1] >= 2:
145
+ pred_avg_rating = y[0, 0]
146
+ pred_num_downloads = y[0, 1]
147
+ elif y.ndim == 1 and y.shape[0] >= 2:
148
+ pred_avg_rating = y[0]
149
+ pred_num_downloads = y[1]
150
+ else:
151
+ return f"✅ Type: {dataset_type} | Durée: {dur:.2f}s\n\nPrédiction brute: {y}"
152
+
153
+ return (
154
+ f"✅ Type détecté : **{dataset_type}**\n"
155
+ f"⏱️ Durée : **{dur:.2f} s**\n\n"
156
+ f"📈 **avg_rating (prédit)** : {pred_avg_rating}\n"
157
+ f"⬇️ **num_downloads (prédit)** : {pred_num_downloads}"
158
+ )
159
+
160
+
161
+ # =========================
162
+ # UI Gradio
163
+ # =========================
164
+ with gr.Blocks(title="Popularity Predictor (openSMILE)") as demo:
165
+ gr.Markdown("# 🎧 Popularity Predictor")
166
+ gr.Markdown(
167
+ "Upload un audio. Si la durée est dans l’une des plages, "
168
+ "on extrait les features openSMILE (eGeMAPS) puis on prédit **avg_rating** et **num_downloads**.\n\n"
169
+ f"- SoundEffect: **{MIN_EFFECT}–{MAX_EFFECT}s**\n"
170
+ f"- Music: **{MIN_MUSIC}–{MAX_MUSIC}s**"
171
+ )
172
+
173
+ audio_in = gr.Audio(label="Uploader un audio", type="filepath")
174
+ btn = gr.Button("🚀 Prédire")
175
+
176
+ out = gr.Markdown()
177
+
178
+ btn.click(fn=predict_popularity, inputs=audio_in, outputs=out)
179
 
180
  demo.launch()
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- gradio==6.5.0
2
- scikit-learn
3
  numpy
4
  pandas
5
- gensim
6
- pytz
7
- git+https://github.com/MTG/freesound-python
8
-
 
 
 
1
+ gradio
 
2
  numpy
3
  pandas
4
+ joblib
5
+ soundfile
6
+ pydub
7
+ opensmile
8
+ scikit-learn==1.8.0
9
+ xgboost
xgb_model_EffectSound.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41317a1a2ac6916e2fc40a8a43097021520ea0de78632149a30ee946b1c697a
3
+ size 16161360
xgb_model_Music.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89dc204e1e774da5b44df74d25d654bce417e4d7304b3bf2efde901dccaf2919
3
+ size 16904032