File size: 4,472 Bytes
1f32474
 
 
 
 
 
95510d5
 
 
1f32474
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95510d5
 
 
 
 
 
 
 
 
1f32474
 
 
4ba1f61
 
 
4eeb3ef
 
95510d5
 
 
4ba1f61
4eeb3ef
f2cbe9e
 
 
4ba1f61
f2cbe9e
403bad7
f2cbe9e
 
18ef4de
4ba1f61
4eeb3ef
 
4ba1f61
4eeb3ef
 
4ba1f61
 
 
 
95510d5
 
 
 
 
 
4ba1f61
 
 
 
95510d5
 
 
 
 
 
4ba1f61
95510d5
 
 
 
aea812d
699fbef
1f32474
95510d5
1f32474
 
 
 
 
95510d5
1f32474
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
import pandas as pd
import freesound
import joblib
import xgboost as xgb

# ----------------------------
# Config Freesound
# ----------------------------
API_TOKEN = "zE9NjEOgUMzH9K7mjiGBaPJiNwJLjSM53LevarRK"
client = freesound.FreesoundClient()
client.set_token(API_TOKEN, "token")

# ----------------------------
# 1️⃣ Charger les modèles
# ----------------------------
# Music
xgb_music_num = joblib.load("xgb_num_downloads_music_model.pkl")
xgb_music_feat_num = joblib.load("xgb_num_downloads_music_features.pkl")
xgb_music_avg = joblib.load("xgb_avg_rating_music_model.pkl")
xgb_music_feat_avg = joblib.load("xgb_avg_rating_music_features.pkl")
le_music_avg = joblib.load("xgb_avg_rating_music_label_encoder.pkl")

# Effect Sound
xgb_effect_num = joblib.load("xgb_num_downloads_effectsound_model.pkl")
xgb_effect_feat_num = joblib.load("xgb_num_downloads_effectsound_features.pkl")
xgb_effect_avg = joblib.load("xgb_avg_rating_effectsound_model.pkl")
xgb_effect_feat_avg = joblib.load("xgb_avg_rating_effectsound_features.pkl")
le_effect_avg = joblib.load("xgb_avg_rating_effectsound_label_encoder.pkl")

# ----------------------------
# 2️⃣ Fonctions utilitaires
# ----------------------------
def safe_float(v):
    try:
        return float(v)
    except:
        return 0.0

def predict_with_model(model, features, feat_list, le=None):
    # Préparer la ligne
    row = []
    for col in feat_list:
        val = features.get(col, 0)
        if val is None or isinstance(val, (list, dict)):
            val = 0
        row.append(safe_float(val))
    
    X = pd.DataFrame([row], columns=feat_list)
    
    # Transformer en DMatrix
    dmatrix = xgb.DMatrix(X.values, feature_names=feat_list)
    
    # Prédiction
    pred_int = int(model.get_booster().predict(dmatrix)[0])
    
    if le:
        return le.inverse_transform([pred_int])[0]
    return pred_int

# ----------------------------
# 2️⃣ Mapping Num_downloads
# ----------------------------
NUM_DOWNLOADS_MAP = {
    0: "Low",
    1: "Medium",
    2: "High"
}

# ----------------------------
# 3️⃣ Extraction + prédiction
# ----------------------------
def extract_and_predict(url):
    try:
        sound_id = int(url.rstrip("/").split("/")[-1])

        # Inclure duration explicitement
        all_features = list(set(
            xgb_music_feat_num + xgb_music_feat_avg + xgb_effect_feat_num + xgb_effect_feat_avg
        ))
        fields = "duration," + ",".join(all_features)

        results = client.search(
            query="",
            filter=f"id:{sound_id}",
            fields=fields
        )

        if len(results.results) == 0:
            return pd.DataFrame([{"Erreur": "Sound not found"}])

        sound = results.results[0]

        # ⚠️ Récupérer duration séparément
        duration = safe_float(sound.get("duration", 0))

        # ✅ Décider du type
        if 0.5 <= duration <= 3:
            # Effect Sound
            num = predict_with_model(xgb_effect_num, sound, xgb_effect_feat_num)
            avg = predict_with_model(xgb_effect_avg, sound, xgb_effect_feat_avg, le_effect_avg)
            return pd.DataFrame([{
                "Type": "Effect Sound",
                "Duration": duration,
                "Num_downloads": NUM_DOWNLOADS_MAP.get(num, str(num)),
                "Avg_rating": avg
            }])
        elif 10 <= duration <= 60:
            # Music
            num = predict_with_model(xgb_music_num, sound, xgb_music_feat_num)
            avg = predict_with_model(xgb_music_avg, sound, xgb_music_feat_avg, le_music_avg)
            return pd.DataFrame([{
                "Type": "Music",
                "Duration": duration,
                "Num_downloads": NUM_DOWNLOADS_MAP.get(num, str(num)),
                "Avg_rating": avg
            }])
        else:
            return pd.DataFrame([{
                "Erreur": "Durée non supportée pour prédiction",
                "Duration": duration
            }])

    except Exception as e:
        return pd.DataFrame([{"Erreur": str(e)}])

# ----------------------------
# 4️⃣ Interface Gradio
# ----------------------------
with gr.Blocks() as demo:
    gr.Markdown("## 🎧 FreeSound – Prédiction XGBoost (DMatrix)")
    url = gr.Textbox(label="URL FreeSound", placeholder="https://freesound.org/s/123456/")
    btn = gr.Button("Prédire")
    out = gr.Dataframe()
    btn.click(extract_and_predict, url, out)

demo.launch()