NIIHAAD commited on
Commit
46adbb9
·
1 Parent(s): 5dad011

update app.py

Browse files
app.py CHANGED
@@ -43,6 +43,9 @@ est_num_downloads_music = joblib.load("music/est_num_downloads_music.joblib")
43
  avg_rating_transformer_music = joblib.load("music/avg_rating_transformer_music.joblib")
44
  music_subcategory_cols = joblib.load("music/music_subcategory_cols.joblib")
45
  music_onehot_cols = joblib.load("music/music_onehot_cols.joblib")
 
 
 
46
 
47
  # EffectSound
48
  scaler_samplerate_effect = joblib.load("effectSound/scaler_effectSamplerate.joblib")
@@ -52,6 +55,7 @@ est_num_downloads_effect = joblib.load("effectSound/est_num_downloads_effectSoun
52
  avg_rating_transformer_effect = joblib.load("effectSound/avg_rating_transformer_effectSound.joblib")
53
  effect_subcategory_cols = joblib.load("effectSound/effectSound_subcategory_cols.joblib")
54
  effect_onehot_cols = joblib.load("effectSound/effectSound_onehot_cols.joblib")
 
55
 
56
  # GloVe pour description
57
  glove_model = api.load("glove-wiki-gigaword-100")
@@ -117,6 +121,7 @@ def preprocess_sound(df):
117
  avg_rating_transformer = avg_rating_transformer_effect
118
  subcat_cols = effect_subcategory_cols
119
  onehot_cols = effect_onehot_cols
 
120
  elif 10 <= dur <= 60:
121
  dataset_type = "music"
122
  scaler_samplerate = scaler_samplerate_music
@@ -126,6 +131,7 @@ def preprocess_sound(df):
126
  avg_rating_transformer = avg_rating_transformer_music
127
  subcat_cols = music_subcategory_cols
128
  onehot_cols = music_onehot_cols
 
129
  else:
130
  return f"❌ Son trop court ou trop long ({dur} sec)"
131
 
@@ -157,36 +163,54 @@ def preprocess_sound(df):
157
 
158
  # Subcategory
159
  for col in subcat_cols:
160
- df[col] = 0 # pour gradio, on va juste créer les colonnes
161
-
162
- # One-hot
 
 
 
 
 
 
 
163
  for col in onehot_cols:
164
- df[col] = 0
165
-
 
 
 
 
 
 
 
 
 
166
  # Tags
167
  # Tags
168
  df["tags_list"] = df["tags"].fillna("").astype(str).str.lower().str.split(",")
 
 
 
 
 
 
169
 
170
- # One-Hot Encoding tags à la volée
171
- all_tags = [t.strip() for sub in df["tags_list"] for t in sub if t.strip() != ""]
172
- frequent_tags = set(all_tags) # simplifié, car threshold appliqué dans Colab déjà
173
- for tag in frequent_tags:
174
- df[f"tag_{tag.replace(' ','_')}"] = 1
175
- df.drop(columns=["tags","tags_list"], inplace=True)
176
 
177
  # Name
178
- df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(pat=".", n=1).str[0]
179
  vectorizer = HashingVectorizer(n_features=8, alternate_sign=False, norm=None)
180
  name_vec = vectorizer.transform(df["name_clean"])
181
  for i in range(8):
182
  df[f"name_vec_{i}"] = name_vec.toarray()[0][i]
183
  df.drop(columns=["name","name_clean"], inplace=True)
 
184
 
185
  # Description
186
  desc_vec = description_to_vec(df["description"].iloc[0], glove_model)
187
  for i in range(100):
188
  df[f"description_glove_{i}"] = desc_vec[i]
189
  df.drop(columns=["description"], inplace=True)
 
190
 
191
  return df
192
 
@@ -202,9 +226,9 @@ def predict_with_metadata(url):
202
  # 2️⃣ Vérifier la durée
203
  dur = df_raw["duration"].iloc[0]
204
  if dur < 0.5:
205
- return raw_str + f"\n\n❌ Son trop court ({dur} sec , veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60s)"
206
  elif 3 < dur < 10 or dur > 60:
207
- return raw_str + f"\n\n❌ Son trop long ou hors plage acceptable ({dur} sec , , veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60s))"
208
 
209
  # 3️⃣ Prétraitement seulement si durée ok
210
  df_processed = preprocess_sound(df_raw)
 
43
  avg_rating_transformer_music = joblib.load("music/avg_rating_transformer_music.joblib")
44
  music_subcategory_cols = joblib.load("music/music_subcategory_cols.joblib")
45
  music_onehot_cols = joblib.load("music/music_onehot_cols.joblib")
46
+ music_onehot_tags = joblib.load("music_onehot_tags.joblib")
47
+
48
+
49
 
50
  # EffectSound
51
  scaler_samplerate_effect = joblib.load("effectSound/scaler_effectSamplerate.joblib")
 
55
  avg_rating_transformer_effect = joblib.load("effectSound/avg_rating_transformer_effectSound.joblib")
56
  effect_subcategory_cols = joblib.load("effectSound/effectSound_subcategory_cols.joblib")
57
  effect_onehot_cols = joblib.load("effectSound/effectSound_onehot_cols.joblib")
58
+ effect_onehot_tags = joblib.load("effect_onehot_tags.joblib")
59
 
60
  # GloVe pour description
61
  glove_model = api.load("glove-wiki-gigaword-100")
 
121
  avg_rating_transformer = avg_rating_transformer_effect
122
  subcat_cols = effect_subcategory_cols
123
  onehot_cols = effect_onehot_cols
124
+ onehot_tags = effect_onehot_tags
125
  elif 10 <= dur <= 60:
126
  dataset_type = "music"
127
  scaler_samplerate = scaler_samplerate_music
 
131
  avg_rating_transformer = avg_rating_transformer_music
132
  subcat_cols = music_subcategory_cols
133
  onehot_cols = music_onehot_cols
134
+ onehot_tags = music_onehot_tags
135
  else:
136
  return f"❌ Son trop court ou trop long ({dur} sec)"
137
 
 
163
 
164
  # Subcategory
165
  for col in subcat_cols:
166
+ df[col] = 0 # toutes les colonnes initialisées à 0
167
+ # activer 1 pour la bonne subcategory
168
+ subcat_val = df["subcategory"].iloc[0]
169
+ for col in subcat_cols:
170
+ cat_name = col.replace("subcategory_", "")
171
+ if subcat_val == cat_name:
172
+ df[col] = 1
173
+ df.drop(columns=["subcategory"], inplace=True)
174
+
175
+ # One-hot
176
  for col in onehot_cols:
177
+ df[col] = 0 # initialiser à 0
178
+ # Activer la colonne correspondante pour license, category, type si existante
179
+ for col in onehot_cols:
180
+ val = df[col].iloc[0]
181
+ # Si la colonne 'val' existe parmi les colonnes du modèle
182
+ model_col = f"{col}_{val}"
183
+ if model_col in df.columns:
184
+ df[model_col] = 1
185
+ df.drop(columns=onehot_cols, inplace=True)
186
+
187
+ # Tags
188
  # Tags
189
  # Tags
190
  df["tags_list"] = df["tags"].fillna("").astype(str).str.lower().str.split(",")
191
+ for col in onehot_tags:
192
+ tag_name = col.replace("tag_", "").replace("_", " ")
193
+ df[col] = int(tag_name in df["tags_list"].iloc[0])
194
+
195
+
196
+ df.drop(columns=["tags_list","tags"], inplace=True)
197
 
 
 
 
 
 
 
198
 
199
  # Name
200
+ df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(".", n=1).str[0]
201
  vectorizer = HashingVectorizer(n_features=8, alternate_sign=False, norm=None)
202
  name_vec = vectorizer.transform(df["name_clean"])
203
  for i in range(8):
204
  df[f"name_vec_{i}"] = name_vec.toarray()[0][i]
205
  df.drop(columns=["name","name_clean"], inplace=True)
206
+
207
 
208
  # Description
209
  desc_vec = description_to_vec(df["description"].iloc[0], glove_model)
210
  for i in range(100):
211
  df[f"description_glove_{i}"] = desc_vec[i]
212
  df.drop(columns=["description"], inplace=True)
213
+
214
 
215
  return df
216
 
 
226
  # 2️⃣ Vérifier la durée
227
  dur = df_raw["duration"].iloc[0]
228
  if dur < 0.5:
229
+ return raw_str + f"\n\n❌ Son trop court ({dur} sec), veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60s)"
230
  elif 3 < dur < 10 or dur > 60:
231
+ return raw_str + f"\n\n❌ Son trop long ou hors plage acceptable ({dur} sec) , veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60s))"
232
 
233
  # 3️⃣ Prétraitement seulement si durée ok
234
  df_processed = preprocess_sound(df_raw)
effectSound/ffect_onehot_tags.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9504d82fd7b4691fdc61b00f2e8ae15e28665fce17c60cf44655ccd60cf09f36
3
+ size 69808
music/music_onehot_tags.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8d510dc14604d2d69333e144cc3212ecb3b446d5192f15940347d65610e6eb1
3
+ size 36877