update app.py
Browse files- app.py +38 -14
- effectSound/ffect_onehot_tags.joblib +3 -0
- music/music_onehot_tags.joblib +3 -0
app.py
CHANGED
|
@@ -43,6 +43,9 @@ est_num_downloads_music = joblib.load("music/est_num_downloads_music.joblib")
|
|
| 43 |
avg_rating_transformer_music = joblib.load("music/avg_rating_transformer_music.joblib")
|
| 44 |
music_subcategory_cols = joblib.load("music/music_subcategory_cols.joblib")
|
| 45 |
music_onehot_cols = joblib.load("music/music_onehot_cols.joblib")
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# EffectSound
|
| 48 |
scaler_samplerate_effect = joblib.load("effectSound/scaler_effectSamplerate.joblib")
|
|
@@ -52,6 +55,7 @@ est_num_downloads_effect = joblib.load("effectSound/est_num_downloads_effectSoun
|
|
| 52 |
avg_rating_transformer_effect = joblib.load("effectSound/avg_rating_transformer_effectSound.joblib")
|
| 53 |
effect_subcategory_cols = joblib.load("effectSound/effectSound_subcategory_cols.joblib")
|
| 54 |
effect_onehot_cols = joblib.load("effectSound/effectSound_onehot_cols.joblib")
|
|
|
|
| 55 |
|
| 56 |
# GloVe pour description
|
| 57 |
glove_model = api.load("glove-wiki-gigaword-100")
|
|
@@ -117,6 +121,7 @@ def preprocess_sound(df):
|
|
| 117 |
avg_rating_transformer = avg_rating_transformer_effect
|
| 118 |
subcat_cols = effect_subcategory_cols
|
| 119 |
onehot_cols = effect_onehot_cols
|
|
|
|
| 120 |
elif 10 <= dur <= 60:
|
| 121 |
dataset_type = "music"
|
| 122 |
scaler_samplerate = scaler_samplerate_music
|
|
@@ -126,6 +131,7 @@ def preprocess_sound(df):
|
|
| 126 |
avg_rating_transformer = avg_rating_transformer_music
|
| 127 |
subcat_cols = music_subcategory_cols
|
| 128 |
onehot_cols = music_onehot_cols
|
|
|
|
| 129 |
else:
|
| 130 |
return f"❌ Son trop court ou trop long ({dur} sec)"
|
| 131 |
|
|
@@ -157,36 +163,54 @@ def preprocess_sound(df):
|
|
| 157 |
|
| 158 |
# Subcategory
|
| 159 |
for col in subcat_cols:
|
| 160 |
-
df[col] = 0 #
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
for col in onehot_cols:
|
| 164 |
-
df[col] = 0
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
# Tags
|
| 167 |
# Tags
|
| 168 |
df["tags_list"] = df["tags"].fillna("").astype(str).str.lower().str.split(",")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
-
# One-Hot Encoding tags à la volée
|
| 171 |
-
all_tags = [t.strip() for sub in df["tags_list"] for t in sub if t.strip() != ""]
|
| 172 |
-
frequent_tags = set(all_tags) # simplifié, car threshold appliqué dans Colab déjà
|
| 173 |
-
for tag in frequent_tags:
|
| 174 |
-
df[f"tag_{tag.replace(' ','_')}"] = 1
|
| 175 |
-
df.drop(columns=["tags","tags_list"], inplace=True)
|
| 176 |
|
| 177 |
# Name
|
| 178 |
-
df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(
|
| 179 |
vectorizer = HashingVectorizer(n_features=8, alternate_sign=False, norm=None)
|
| 180 |
name_vec = vectorizer.transform(df["name_clean"])
|
| 181 |
for i in range(8):
|
| 182 |
df[f"name_vec_{i}"] = name_vec.toarray()[0][i]
|
| 183 |
df.drop(columns=["name","name_clean"], inplace=True)
|
|
|
|
| 184 |
|
| 185 |
# Description
|
| 186 |
desc_vec = description_to_vec(df["description"].iloc[0], glove_model)
|
| 187 |
for i in range(100):
|
| 188 |
df[f"description_glove_{i}"] = desc_vec[i]
|
| 189 |
df.drop(columns=["description"], inplace=True)
|
|
|
|
| 190 |
|
| 191 |
return df
|
| 192 |
|
|
@@ -202,9 +226,9 @@ def predict_with_metadata(url):
|
|
| 202 |
# 2️⃣ Vérifier la durée
|
| 203 |
dur = df_raw["duration"].iloc[0]
|
| 204 |
if dur < 0.5:
|
| 205 |
-
return raw_str + f"\n\n❌ Son trop court ({dur} sec
|
| 206 |
elif 3 < dur < 10 or dur > 60:
|
| 207 |
-
return raw_str + f"\n\n❌ Son trop long ou hors plage acceptable ({dur} sec ,
|
| 208 |
|
| 209 |
# 3️⃣ Prétraitement seulement si durée ok
|
| 210 |
df_processed = preprocess_sound(df_raw)
|
|
|
|
| 43 |
avg_rating_transformer_music = joblib.load("music/avg_rating_transformer_music.joblib")
|
| 44 |
music_subcategory_cols = joblib.load("music/music_subcategory_cols.joblib")
|
| 45 |
music_onehot_cols = joblib.load("music/music_onehot_cols.joblib")
|
| 46 |
+
music_onehot_tags = joblib.load("music_onehot_tags.joblib")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
|
| 50 |
# EffectSound
|
| 51 |
scaler_samplerate_effect = joblib.load("effectSound/scaler_effectSamplerate.joblib")
|
|
|
|
| 55 |
avg_rating_transformer_effect = joblib.load("effectSound/avg_rating_transformer_effectSound.joblib")
|
| 56 |
effect_subcategory_cols = joblib.load("effectSound/effectSound_subcategory_cols.joblib")
|
| 57 |
effect_onehot_cols = joblib.load("effectSound/effectSound_onehot_cols.joblib")
|
| 58 |
+
effect_onehot_tags = joblib.load("effect_onehot_tags.joblib")
|
| 59 |
|
| 60 |
# GloVe pour description
|
| 61 |
glove_model = api.load("glove-wiki-gigaword-100")
|
|
|
|
| 121 |
avg_rating_transformer = avg_rating_transformer_effect
|
| 122 |
subcat_cols = effect_subcategory_cols
|
| 123 |
onehot_cols = effect_onehot_cols
|
| 124 |
+
onehot_tags = effect_onehot_tags
|
| 125 |
elif 10 <= dur <= 60:
|
| 126 |
dataset_type = "music"
|
| 127 |
scaler_samplerate = scaler_samplerate_music
|
|
|
|
| 131 |
avg_rating_transformer = avg_rating_transformer_music
|
| 132 |
subcat_cols = music_subcategory_cols
|
| 133 |
onehot_cols = music_onehot_cols
|
| 134 |
+
onehot_tags = music_onehot_tags
|
| 135 |
else:
|
| 136 |
return f"❌ Son trop court ou trop long ({dur} sec)"
|
| 137 |
|
|
|
|
| 163 |
|
| 164 |
# Subcategory
|
| 165 |
for col in subcat_cols:
|
| 166 |
+
df[col] = 0 # toutes les colonnes initialisées à 0
|
| 167 |
+
# activer 1 pour la bonne subcategory
|
| 168 |
+
subcat_val = df["subcategory"].iloc[0]
|
| 169 |
+
for col in subcat_cols:
|
| 170 |
+
cat_name = col.replace("subcategory_", "")
|
| 171 |
+
if subcat_val == cat_name:
|
| 172 |
+
df[col] = 1
|
| 173 |
+
df.drop(columns=["subcategory"], inplace=True)
|
| 174 |
+
|
| 175 |
+
# One-hot
|
| 176 |
for col in onehot_cols:
|
| 177 |
+
df[col] = 0 # initialiser à 0
|
| 178 |
+
# Activer la colonne correspondante pour license, category, type si existante
|
| 179 |
+
for col in onehot_cols:
|
| 180 |
+
val = df[col].iloc[0]
|
| 181 |
+
# Si la colonne 'val' existe parmi les colonnes du modèle
|
| 182 |
+
model_col = f"{col}_{val}"
|
| 183 |
+
if model_col in df.columns:
|
| 184 |
+
df[model_col] = 1
|
| 185 |
+
df.drop(columns=onehot_cols, inplace=True)
|
| 186 |
+
|
| 187 |
+
# Tags
|
| 188 |
# Tags
|
| 189 |
# Tags
|
| 190 |
df["tags_list"] = df["tags"].fillna("").astype(str).str.lower().str.split(",")
|
| 191 |
+
for col in onehot_tags:
|
| 192 |
+
tag_name = col.replace("tag_", "").replace("_", " ")
|
| 193 |
+
df[col] = int(tag_name in df["tags_list"].iloc[0])
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
df.drop(columns=["tags_list","tags"], inplace=True)
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
|
| 199 |
# Name
|
| 200 |
+
df["name_clean"] = df["name"].astype(str).str.lower().str.rsplit(".", n=1).str[0]
|
| 201 |
vectorizer = HashingVectorizer(n_features=8, alternate_sign=False, norm=None)
|
| 202 |
name_vec = vectorizer.transform(df["name_clean"])
|
| 203 |
for i in range(8):
|
| 204 |
df[f"name_vec_{i}"] = name_vec.toarray()[0][i]
|
| 205 |
df.drop(columns=["name","name_clean"], inplace=True)
|
| 206 |
+
|
| 207 |
|
| 208 |
# Description
|
| 209 |
desc_vec = description_to_vec(df["description"].iloc[0], glove_model)
|
| 210 |
for i in range(100):
|
| 211 |
df[f"description_glove_{i}"] = desc_vec[i]
|
| 212 |
df.drop(columns=["description"], inplace=True)
|
| 213 |
+
|
| 214 |
|
| 215 |
return df
|
| 216 |
|
|
|
|
| 226 |
# 2️⃣ Vérifier la durée
|
| 227 |
dur = df_raw["duration"].iloc[0]
|
| 228 |
if dur < 0.5:
|
| 229 |
+
return raw_str + f"\n\n❌ Son trop court ({dur} sec), veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60s)"
|
| 230 |
elif 3 < dur < 10 or dur > 60:
|
| 231 |
+
return raw_str + f"\n\n❌ Son trop long ou hors plage acceptable ({dur} sec) , veuillez entrer un son qui est court (0.5 à 3 s) ou un son long (10 à 60s))"
|
| 232 |
|
| 233 |
# 3️⃣ Prétraitement seulement si durée ok
|
| 234 |
df_processed = preprocess_sound(df_raw)
|
effectSound/ffect_onehot_tags.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9504d82fd7b4691fdc61b00f2e8ae15e28665fce17c60cf44655ccd60cf09f36
|
| 3 |
+
size 69808
|
music/music_onehot_tags.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8d510dc14604d2d69333e144cc3212ecb3b446d5192f15940347d65610e6eb1
|
| 3 |
+
size 36877
|