NIIHAAD commited on
Commit
7e1871b
·
1 Parent(s): 27b2989

update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -6
app.py CHANGED
@@ -242,6 +242,32 @@ def preprocess_features_full(df):
242
  return df
243
 
244
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  # -------- Fonctions --------
247
 
@@ -251,6 +277,8 @@ def predict_with_metadata(url):
251
 
252
  df = fetch_sound_metadata(url)
253
  df = preprocess_targets(df)
 
 
254
  #df_raw = df.copy()
255
  #df = preprocess_targets(df)
256
  #df = preprocess_features_full(df) # <-- version complète
@@ -266,13 +294,10 @@ def predict_with_metadata(url):
266
  print("=== COLONNES AVANT PRÉTRAITEMENT ===")
267
  print(df.columns.tolist())
268
  print(df.dtypes)
269
- return ("🔹 DONNÉES BRUTES (avant traitement)\n"
270
- "----------------------------------\n"
271
- f"{df.columns.tolist()}\n\n"
272
  f"{df.dtypes}"
273
- "OK – TARGETS CALCULÉES\n"
274
- f"avg_rating (discrétisé) : {df['avg_rating'].iloc[0]}\n"
275
- f"num_downloads_class : {df['num_downloads_class'].iloc[0]}"
276
  )
277
 
278
 
 
242
  return df
243
 
244
 
245
+ def preprocess_features_step1(df):
246
+ df = df.copy()
247
+
248
+ # 1. Bool → int
249
+ df["category_is_user_provided"] = df["category_is_user_provided"].astype(int)
250
+
251
+ # 2. Catégorielles simples → one-hot
252
+ cat_cols = ["license", "category", "type"]
253
+ df[cat_cols] = df[cat_cols].fillna("Unknown")
254
+ df = pd.get_dummies(df, columns=cat_cols, drop_first=False)
255
+
256
+ # 3. username → frequency encoding
257
+ user_freq = df["username"].value_counts(normalize=True)
258
+ df["username_freq"] = df["username"].map(user_freq)
259
+ df.drop(columns=["username"], inplace=True)
260
+
261
+ # 4. Numériques simples
262
+ num_cols = ["num_ratings", "filesize", "duration"]
263
+ for col in num_cols:
264
+ df[col] = np.log1p(df[col])
265
+
266
+ scaler = StandardScaler()
267
+ df[["samplerate"]] = scaler.fit_transform(df[["samplerate"]])
268
+
269
+ return df
270
+
271
 
272
  # -------- Fonctions --------
273
 
 
277
 
278
  df = fetch_sound_metadata(url)
279
  df = preprocess_targets(df)
280
+ df = preprocess_features_step1(df)
281
+
282
  #df_raw = df.copy()
283
  #df = preprocess_targets(df)
284
  #df = preprocess_features_full(df) # <-- version complète
 
294
  print("=== COLONNES AVANT PRÉTRAITEMENT ===")
295
  print(df.columns.tolist())
296
  print(df.dtypes)
297
+ return (
298
+ "OK – FEATURES STEP 1\n"
299
+ f"Nombre de colonnes : {df.shape[1]}\n\n"
300
  f"{df.dtypes}"
 
 
 
301
  )
302
 
303