flodussart commited on
Commit
c47e8f0
·
1 Parent(s): 10532d1
app.py CHANGED
@@ -1,200 +1,431 @@
1
- # import os
2
- # import mlflow
3
- # import pandas as pd
4
- # from fastapi import FastAPI, HTTPException
5
- # from pydantic import BaseModel
6
- # import uvicorn
7
- # from dotenv import load_dotenv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
 
9
 
10
- # # --------------------------
11
- # # 📘 FastAPI Metadata
12
- # # --------------------------
13
- # description = """
14
- # # 🚗 Getaround API Documentation
15
 
16
- # Bienvenue sur l'API Getaround ! Cette application permet d’analyser les retards de retour de véhicules et d’optimiser la tarification via Machine Learning.
 
 
 
 
 
17
 
18
- # ## 🔗 Dashboard interactif
19
- # 👉 [Dashboard Streamlit](https://flodussart-getaround-streamlit.hf.space)
20
 
21
- # ## 🤖 Endpoint ML : `/predict`
22
- # - Entrée : liste de caractéristiques numériques
23
- # - Sortie : prédiction du prix
 
 
 
 
 
 
 
 
 
24
 
25
- # Exemple :
26
- # ```json
27
- # {
28
- # "input": [[7.0, 0.27, 0.36, 20.7, 0.045, 45.0, 170.0, 1.001, 3.0, 0.45, 8.8]]
29
- # }
30
- # """
31
- # tags_metadata = [
32
- # {
33
- # "name": "Machine Learning",
34
- # "description": "Prédiction du prix de location via MLflow"
35
- # }
36
- # ]
37
 
38
- # load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # # Charger modèle MLflow
41
- # mlflow.set_tracking_uri(os.getenv("APP_URI"))
42
- # model = mlflow.pyfunc.load_model("models:/getaround-project-v4/1")
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  # app = FastAPI(
45
  # title="🚗 Getaround Pricing API",
46
- # description="API de prédiction du prix de location — modèle ML",
 
 
 
 
 
47
  # version="1.0",
48
- # docs_url="/docs"
 
49
  # )
50
 
51
- # class PredictionInput(BaseModel):
52
- # input: list[list[float]]
 
 
53
 
54
- # @app.post("/predict")
55
- # def predict(payload: PredictionInput):
56
- # try:
57
- # df = pd.DataFrame(payload.input, columns=[
58
- # 'model_key', 'mileage', 'engine_power', 'fuel', 'paint_color', 'car_type',
59
- # 'private_parking_available', 'has_gps', 'has_air_conditioning',
60
- # 'automatic_car', 'has_getaround_connect', 'has_speed_regulator', 'winter_tires'
61
- # ])
62
- # prediction = model.predict(df)
63
- # return {"prediction": prediction.tolist()}
64
- # except Exception as e:
65
- # raise HTTPException(status_code=500, detail=f"Erreur lors de la prédiction : {e}")
66
 
67
 
68
- # if __name__ == "__main__":
69
- # uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", 8080)))
70
- # # app = FastAPI(
71
- # # title="Getaround API",
72
- # # description=description,
73
- # # version="1.0",
74
- # # contact={
75
- # # "name": "Floriane Dussart",
76
- # # "email": "floriane.dussart@gmail.com",
77
- # # },
78
- # # openapi_tags=tags_metadata
79
- # # )
80
 
 
 
81
 
82
- # # # === Charger les variables d'environnement ===
83
- # # load_dotenv()
 
84
 
85
- # # # === Définir le port imposé par Hugging Face ===
86
- # # PORT = int(os.getenv("PORT", 8080))
 
 
 
 
 
87
 
88
- # # # === Configurer le serveur MLflow ===
89
- # # mlflow.set_tracking_uri(os.getenv("APP_URI") or "https://flodussart-getaroundproject.hf.space")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- # # # === Charger le modèle MLflow ===
92
- # # try:
93
- # # model = mlflow.pyfunc.load_model("models:/getaround-project-v4/1")
94
- # # except Exception as e:
95
- # # raise RuntimeError(f"❌ Erreur de chargement du modèle MLflow : {e}")
96
 
97
- # # # === Création de l'application FastAPI ===
98
- # # app = FastAPI(
99
- # # title="🚗 Getaround Price API",
100
- # # description="Prédiction du prix journalier de location de voiture",
101
- # # version="1.0",
102
- # # docs_url="/docs",
103
- # # redoc_url="/redoc"
104
- # # )
 
 
 
105
 
106
- # # # === Schéma d’entrée utilisateur ===
107
- # # class PredictionInput(BaseModel):
108
- # # model_key: str
109
- # # mileage: float
110
- # # engine_power: float
111
- # # fuel: str
112
- # # paint_color: str
113
- # # car_type: str
114
- # # private_parking_available: bool
115
- # # has_gps: bool
116
- # # has_air_conditioning: bool
117
- # # automatic_car: bool
118
- # # has_getaround_connect: bool
119
- # # has_speed_regulator: bool
120
- # # winter_tires: bool
121
-
122
- # # model_config = {"protected_namespaces": ()}
123
-
124
- # # @field_validator("paint_color")
125
- # # @classmethod
126
- # # def validate_paint_color(cls, v):
127
- # # allowed = ['black', 'grey', 'white', 'green', 'red', 'silver', 'blue', 'beige', 'brown', 'other']
128
- # # return v if v in allowed else "other"
129
-
130
- # # @field_validator("fuel")
131
- # # @classmethod
132
- # # def validate_fuel(cls, v):
133
- # # allowed = ['diesel', 'petrol', 'other']
134
- # # return v if v in allowed else "other"
135
-
136
- # # @field_validator("model_key")
137
- # # @classmethod
138
- # # def validate_model_key(cls, v):
139
- # # allowed = ['Citroën','Peugeot','PGO','Renault','Audi','BMW','Mercedes','Opel',
140
- # # 'Volkswagen','Ferrari','Mitsubishi','Nissan','SEAT','Maserati',
141
- # # 'Subaru','Toyota','Other']
142
- # # return v if v in allowed else "Other"
143
-
144
- # # @field_validator("car_type")
145
- # # @classmethod
146
- # # def validate_car_type(cls, v):
147
- # # allowed = ['convertible','coupe','estate','hatchback','sedan','subcompact','suv','van']
148
- # # return v if v in allowed else "other"
149
-
150
- # # # === Page d’accueil ===
151
- # # @app.get("/")
152
- # # def welcome():
153
- # # return {"message": "Bienvenue sur l’API Getaround 🚗 — utilisez /predict pour estimer un prix."}
154
-
155
- # # # === Endpoint de prédiction ===
156
- # # @app.post("/predict")
157
- # # def predict(input_data: PredictionInput):
158
- # # try:
159
- # # df = pd.DataFrame([input_data.dict()])
160
- # # prediction = model.predict(df)
161
- # # return {
162
- # # "prediction": round(float(prediction[0]), 2),
163
- # # "input": input_data.dict()
164
- # # }
165
- # # except Exception as e:
166
- # # raise HTTPException(status_code=500, detail=f"Erreur interne : {e}")
167
 
168
- # # # === Optionnel : Exécution locale (inutile sur HF, mais pratique pour debug) ===
169
- # # if __name__ == "__main__":
170
- # # uvicorn.run("app:app", host="0.0.0.0", port=PORT)
171
 
172
  import os
173
  import json
174
  from pathlib import Path
175
- from typing import Optional, List
176
 
177
  import pandas as pd
178
- from fastapi import FastAPI, HTTPException
179
- from pydantic import BaseModel, Field
180
- import uvicorn
181
  import mlflow.pyfunc
 
 
 
 
 
182
 
183
 
184
  # =======================
185
  # Config
186
  # =======================
187
- PORT = int(os.getenv("PORT", 8080))
188
  LOCAL_MODEL_PATH = os.getenv("MODEL_PATH", "model_bundle/model")
189
 
190
 
191
  # =======================
192
  # Helpers
193
  # =======================
194
- def load_features_from_artifacts(model_dir: str) -> List[str]:
195
  """
196
  Essaie de lire artifacts/features_used.json (déposé lors du training).
197
- Si absent, fallback vers une liste cohérente avec le meilleur modèle signalé.
198
  """
199
  fp = Path(model_dir) / "artifacts" / "features_used.json"
200
  if fp.exists():
@@ -203,12 +434,12 @@ def load_features_from_artifacts(model_dir: str) -> List[str]:
203
  list(data.get("categorical", [])) + \
204
  list(data.get("boolean", []))
205
 
206
- # Fallback (conforme à ton features_used.json partagé)
207
  return [
208
  # numeric
209
  "mileage", "engine_power",
210
- # categorical
211
- "model_key", "fuel_grouped", "paint_color", "car_type_grouped",
212
  # boolean
213
  "private_parking_available", "has_gps", "has_air_conditioning",
214
  "automatic_car", "has_getaround_connect", "has_speed_regulator", "winter_tires",
@@ -216,7 +447,7 @@ def load_features_from_artifacts(model_dir: str) -> List[str]:
216
 
217
 
218
  # =======================
219
- # App + model loading
220
  # =======================
221
  app = FastAPI(
222
  title="🚗 Getaround Pricing API",
@@ -231,24 +462,51 @@ app = FastAPI(
231
  redoc_url="/redoc",
232
  )
233
 
 
 
 
 
 
 
 
 
 
 
234
  try:
235
  model = mlflow.pyfunc.load_model(LOCAL_MODEL_PATH)
236
  except Exception as e:
237
  raise RuntimeError(f"❌ Impossible de charger le modèle local '{LOCAL_MODEL_PATH}': {e}")
238
 
239
- FEATURES: List[str] = load_features_from_artifacts(LOCAL_MODEL_PATH)
240
 
241
 
242
  # =======================
243
- # Schemas (typés)
244
  # =======================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  class PredictRow(BaseModel):
246
- # Types explicites = /docs plus fiable
247
  mileage: float
248
  engine_power: float
249
- model_key: str
250
  fuel_grouped: str
251
- paint_color: str
252
  car_type_grouped: str
253
  private_parking_available: bool
254
  has_gps: bool
@@ -258,15 +516,60 @@ class PredictRow(BaseModel):
258
  has_speed_regulator: bool
259
  winter_tires: bool
260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  class PredictPayload(BaseModel):
263
- # On accepte l’un OU l’autre format.
264
- rows: Optional[List[PredictRow]] = Field(
265
- default=None, description="Format recommandé : liste d'objets typés."
266
- )
267
- input: Optional[List[List[float]]] = Field(
 
 
268
  default=None,
269
- description=f"Format legacy : matrice. Chaque ligne doit suivre l'ordre strict: {FEATURES}",
270
  )
271
 
272
 
@@ -295,13 +598,15 @@ def build_df_from_payload(payload: PredictPayload) -> pd.DataFrame:
295
  Priorité au format 'rows' (typé). Sinon 'input' (ordre strict).
296
  """
297
  if payload.rows:
298
- df = pd.DataFrame([r.dict() for r in payload.rows])
 
299
  missing = [c for c in FEATURES if c not in df.columns]
300
  if missing:
301
  raise HTTPException(
302
  status_code=400,
303
  detail=f"Colonnes manquantes: {missing}. Attendu: {FEATURES}",
304
  )
 
305
  return df[FEATURES]
306
 
307
  if payload.input:
@@ -325,6 +630,7 @@ def predict(payload: PredictPayload):
325
  try:
326
  df = build_df_from_payload(payload)
327
  y_hat = model.predict(df)
 
328
  preds = [float(x) for x in (y_hat.tolist() if hasattr(y_hat, "tolist") else y_hat)]
329
  return {"prediction": preds}
330
  except HTTPException:
@@ -334,4 +640,6 @@ def predict(payload: PredictPayload):
334
 
335
 
336
  if __name__ == "__main__":
337
- uvicorn.run(app, host="0.0.0.0", port=4000)
 
 
 
1
+ # # import os
2
+ # # import mlflow
3
+ # # import pandas as pd
4
+ # # from fastapi import FastAPI, HTTPException
5
+ # # from pydantic import BaseModel
6
+ # # import uvicorn
7
+ # # from dotenv import load_dotenv
8
+
9
+
10
+ # # # --------------------------
11
+ # # # 📘 FastAPI Metadata
12
+ # # # --------------------------
13
+ # # description = """
14
+ # # # 🚗 Getaround API Documentation
15
+
16
+ # # Bienvenue sur l'API Getaround ! Cette application permet d’analyser les retards de retour de véhicules et d’optimiser la tarification via Machine Learning.
17
+
18
+ # # ## 🔗 Dashboard interactif
19
+ # # 👉 [Dashboard Streamlit](https://flodussart-getaround-streamlit.hf.space)
20
+
21
+ # # ## 🤖 Endpoint ML : `/predict`
22
+ # # - Entrée : liste de caractéristiques numériques
23
+ # # - Sortie : prédiction du prix
24
+
25
+ # # Exemple :
26
+ # # ```json
27
+ # # {
28
+ # # "input": [[7.0, 0.27, 0.36, 20.7, 0.045, 45.0, 170.0, 1.001, 3.0, 0.45, 8.8]]
29
+ # # }
30
+ # # """
31
+ # # tags_metadata = [
32
+ # # {
33
+ # # "name": "Machine Learning",
34
+ # # "description": "Prédiction du prix de location via MLflow"
35
+ # # }
36
+ # # ]
37
 
38
+ # # load_dotenv()
39
 
40
+ # # # Charger modèle MLflow
41
+ # # mlflow.set_tracking_uri(os.getenv("APP_URI"))
42
+ # # model = mlflow.pyfunc.load_model("models:/getaround-project-v4/1")
 
 
43
 
44
+ # # app = FastAPI(
45
+ # # title="🚗 Getaround Pricing API",
46
+ # # description="API de prédiction du prix de location — modèle ML",
47
+ # # version="1.0",
48
+ # # docs_url="/docs"
49
+ # # )
50
 
51
+ # # class PredictionInput(BaseModel):
52
+ # # input: list[list[float]]
53
 
54
+ # # @app.post("/predict")
55
+ # # def predict(payload: PredictionInput):
56
+ # # try:
57
+ # # df = pd.DataFrame(payload.input, columns=[
58
+ # # 'model_key', 'mileage', 'engine_power', 'fuel', 'paint_color', 'car_type',
59
+ # # 'private_parking_available', 'has_gps', 'has_air_conditioning',
60
+ # # 'automatic_car', 'has_getaround_connect', 'has_speed_regulator', 'winter_tires'
61
+ # # ])
62
+ # # prediction = model.predict(df)
63
+ # # return {"prediction": prediction.tolist()}
64
+ # # except Exception as e:
65
+ # # raise HTTPException(status_code=500, detail=f"Erreur lors de la prédiction : {e}")
66
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # # if __name__ == "__main__":
69
+ # # uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", 8080)))
70
+ # # # app = FastAPI(
71
+ # # # title="Getaround API",
72
+ # # # description=description,
73
+ # # # version="1.0",
74
+ # # # contact={
75
+ # # # "name": "Floriane Dussart",
76
+ # # # "email": "floriane.dussart@gmail.com",
77
+ # # # },
78
+ # # # openapi_tags=tags_metadata
79
+ # # # )
80
+
81
+
82
+ # # # # === Charger les variables d'environnement ===
83
+ # # # load_dotenv()
84
+
85
+ # # # # === Définir le port imposé par Hugging Face ===
86
+ # # # PORT = int(os.getenv("PORT", 8080))
87
+
88
+ # # # # === Configurer le serveur MLflow ===
89
+ # # # mlflow.set_tracking_uri(os.getenv("APP_URI") or "https://flodussart-getaroundproject.hf.space")
90
+
91
+ # # # # === Charger le modèle MLflow ===
92
+ # # # try:
93
+ # # # model = mlflow.pyfunc.load_model("models:/getaround-project-v4/1")
94
+ # # # except Exception as e:
95
+ # # # raise RuntimeError(f"❌ Erreur de chargement du modèle MLflow : {e}")
96
+
97
+ # # # # === Création de l'application FastAPI ===
98
+ # # # app = FastAPI(
99
+ # # # title="🚗 Getaround Price API",
100
+ # # # description="Prédiction du prix journalier de location de voiture",
101
+ # # # version="1.0",
102
+ # # # docs_url="/docs",
103
+ # # # redoc_url="/redoc"
104
+ # # # )
105
+
106
+ # # # # === Schéma d’entrée utilisateur ===
107
+ # # # class PredictionInput(BaseModel):
108
+ # # # model_key: str
109
+ # # # mileage: float
110
+ # # # engine_power: float
111
+ # # # fuel: str
112
+ # # # paint_color: str
113
+ # # # car_type: str
114
+ # # # private_parking_available: bool
115
+ # # # has_gps: bool
116
+ # # # has_air_conditioning: bool
117
+ # # # automatic_car: bool
118
+ # # # has_getaround_connect: bool
119
+ # # # has_speed_regulator: bool
120
+ # # # winter_tires: bool
121
+
122
+ # # # model_config = {"protected_namespaces": ()}
123
+
124
+ # # # @field_validator("paint_color")
125
+ # # # @classmethod
126
+ # # # def validate_paint_color(cls, v):
127
+ # # # allowed = ['black', 'grey', 'white', 'green', 'red', 'silver', 'blue', 'beige', 'brown', 'other']
128
+ # # # return v if v in allowed else "other"
129
+
130
+ # # # @field_validator("fuel")
131
+ # # # @classmethod
132
+ # # # def validate_fuel(cls, v):
133
+ # # # allowed = ['diesel', 'petrol', 'other']
134
+ # # # return v if v in allowed else "other"
135
+
136
+ # # # @field_validator("model_key")
137
+ # # # @classmethod
138
+ # # # def validate_model_key(cls, v):
139
+ # # # allowed = ['Citroën','Peugeot','PGO','Renault','Audi','BMW','Mercedes','Opel',
140
+ # # # 'Volkswagen','Ferrari','Mitsubishi','Nissan','SEAT','Maserati',
141
+ # # # 'Subaru','Toyota','Other']
142
+ # # # return v if v in allowed else "Other"
143
+
144
+ # # # @field_validator("car_type")
145
+ # # # @classmethod
146
+ # # # def validate_car_type(cls, v):
147
+ # # # allowed = ['convertible','coupe','estate','hatchback','sedan','subcompact','suv','van']
148
+ # # # return v if v in allowed else "other"
149
+
150
+ # # # # === Page d’accueil ===
151
+ # # # @app.get("/")
152
+ # # # def welcome():
153
+ # # # return {"message": "Bienvenue sur l’API Getaround 🚗 — utilisez /predict pour estimer un prix."}
154
+
155
+ # # # # === Endpoint de prédiction ===
156
+ # # # @app.post("/predict")
157
+ # # # def predict(input_data: PredictionInput):
158
+ # # # try:
159
+ # # # df = pd.DataFrame([input_data.dict()])
160
+ # # # prediction = model.predict(df)
161
+ # # # return {
162
+ # # # "prediction": round(float(prediction[0]), 2),
163
+ # # # "input": input_data.dict()
164
+ # # # }
165
+ # # # except Exception as e:
166
+ # # # raise HTTPException(status_code=500, detail=f"Erreur interne : {e}")
167
+
168
+ # # # # === Optionnel : Exécution locale (inutile sur HF, mais pratique pour debug) ===
169
+ # # # if __name__ == "__main__":
170
+ # # # uvicorn.run("app:app", host="0.0.0.0", port=PORT)
171
 
172
+ # import os
173
+ # import json
174
+ # from pathlib import Path
175
+ # from typing import Optional, List
176
 
177
+ # import pandas as pd
178
+ # from fastapi import FastAPI, HTTPException
179
+ # from pydantic import BaseModel, Field
180
+ # import uvicorn
181
+ # import mlflow.pyfunc
182
+
183
+
184
+ # # =======================
185
+ # # Config
186
+ # # =======================
187
+ # PORT = int(os.getenv("PORT", 8080))
188
+ # LOCAL_MODEL_PATH = os.getenv("MODEL_PATH", "model_bundle/model")
189
+
190
+
191
+ # # =======================
192
+ # # Helpers
193
+ # # =======================
194
+ # def load_features_from_artifacts(model_dir: str) -> List[str]:
195
+ # """
196
+ # Essaie de lire artifacts/features_used.json (déposé lors du training).
197
+ # Si absent, fallback vers une liste cohérente avec le meilleur modèle signalé.
198
+ # """
199
+ # fp = Path(model_dir) / "artifacts" / "features_used.json"
200
+ # if fp.exists():
201
+ # data = json.loads(fp.read_text())
202
+ # return list(data.get("numeric", [])) + \
203
+ # list(data.get("categorical", [])) + \
204
+ # list(data.get("boolean", []))
205
+
206
+ # # Fallback (conforme à ton features_used.json partagé)
207
+ # return [
208
+ # # numeric
209
+ # "mileage", "engine_power",
210
+ # # categorical
211
+ # "model_key", "fuel_grouped", "paint_color", "car_type_grouped",
212
+ # # boolean
213
+ # "private_parking_available", "has_gps", "has_air_conditioning",
214
+ # "automatic_car", "has_getaround_connect", "has_speed_regulator", "winter_tires",
215
+ # ]
216
+
217
+
218
+ # # =======================
219
+ # # App + model loading
220
+ # # =======================
221
  # app = FastAPI(
222
  # title="🚗 Getaround Pricing API",
223
+ # description=(
224
+ # "Prédiction du prix journalier de location.\n\n"
225
+ # "• Dashboard : https://flodussart-getaround-streamlit.hf.space\n"
226
+ # "• Endpoint ML : POST /predict — body: {\"rows\": [...] } (recommandé) "
227
+ # "ou {\"input\": [[...], ...]} (ordre strict des features).\n"
228
+ # ),
229
  # version="1.0",
230
+ # docs_url="/docs",
231
+ # redoc_url="/redoc",
232
  # )
233
 
234
+ # try:
235
+ # model = mlflow.pyfunc.load_model(LOCAL_MODEL_PATH)
236
+ # except Exception as e:
237
+ # raise RuntimeError(f"❌ Impossible de charger le modèle local '{LOCAL_MODEL_PATH}': {e}")
238
 
239
+ # FEATURES: List[str] = load_features_from_artifacts(LOCAL_MODEL_PATH)
 
 
 
 
 
 
 
 
 
 
 
240
 
241
 
242
+ # # =======================
243
+ # # Schemas (typés)
244
+ # # =======================
 
 
 
 
 
 
 
 
 
245
 
246
+ # from pydantic import BaseModel, validator
247
+ # from unidecode import unidecode
248
 
249
+ # ALLOWED_FUEL = {"diesel", "petrol", "other"}
250
+ # ALLOWED_PAINT = {"black", "grey", "blue", "white", "brown", "silver", "red", "beige", "green", "orange"}
251
+ # ALLOWED_CARTYPE = {"estate", "sedan", "suv", "hatchback", "other"}
252
 
253
+ # # mets ici les modèles fréquents ; sinon ça tombera sur "other"
254
+ # KNOWN_MODELS = {
255
+ # "citroen","renault","bmw","peugeot","audi","nissan","mitsubishi","mercedes",
256
+ # "volkswagen","toyota","seat","subaru","pgo","opel","ferrari","maserati",
257
+ # "suzuki","ford","porsche","alfa romeo","kia motors","fiat","lamborghini",
258
+ # "lexus","honda","mazda","yamaha"
259
+ # }
260
 
261
+ # STRICT = False # True pour rejeter les valeurs inconnues (400)
262
+
263
+ # def _norm(x: str) -> str:
264
+ # return unidecode(str(x)).strip().lower() # "Citroën" -> "citroen"
265
+
266
+ # class PredictRow(BaseModel):
267
+ # mileage: float
268
+ # engine_power: float
269
+ # model_key: str
270
+ # fuel_grouped: str
271
+ # paint_color: str
272
+ # car_type_grouped: str
273
+ # private_parking_available: bool
274
+ # has_gps: bool
275
+ # has_air_conditioning: bool
276
+ # automatic_car: bool
277
+ # has_getaround_connect: bool
278
+ # has_speed_regulator: bool
279
+ # winter_tires: bool
280
+
281
+ # @validator("model_key", "fuel_grouped", "paint_color", "car_type_grouped", pre=True)
282
+ # def _normalize(cls, v): return _norm(v)
283
+
284
+ # @validator("fuel_grouped")
285
+ # def _fuel(cls, v):
286
+ # if v in ALLOWED_FUEL: return v
287
+ # if STRICT: raise ValueError(f"fuel_grouped in {sorted(ALLOWED_FUEL)}")
288
+ # return "other"
289
+
290
+ # @validator("paint_color")
291
+ # def _paint(cls, v):
292
+ # if v in ALLOWED_PAINT: return v
293
+ # if STRICT: raise ValueError(f"paint_color in {sorted(ALLOWED_PAINT)}")
294
+ # return "other"
295
+
296
+ # @validator("car_type_grouped")
297
+ # def _ctype(cls, v):
298
+ # if v in ALLOWED_CARTYPE: return v
299
+ # if STRICT: raise ValueError(f"car_type_grouped in {sorted(ALLOWED_CARTYPE)}")
300
+ # return "other"
301
+
302
+ # @validator("model_key")
303
+ # def _model(cls, v):
304
+ # if v in KNOWN_MODELS: return v
305
+ # return "other" if not STRICT else (_ for _ in ()).throw(ValueError("unknown model_key"))
306
+
307
+ # class PredictRow(BaseModel):
308
+ # # Types explicites = /docs plus fiable ✨
309
+ # mileage: float
310
+ # engine_power: float
311
+ # model_key: str
312
+ # fuel_grouped: str
313
+ # paint_color: str
314
+ # car_type_grouped: str
315
+ # private_parking_available: bool
316
+ # has_gps: bool
317
+ # has_air_conditioning: bool
318
+ # automatic_car: bool
319
+ # has_getaround_connect: bool
320
+ # has_speed_regulator: bool
321
+ # winter_tires: bool
322
+
323
+
324
+ # class PredictPayload(BaseModel):
325
+ # # On accepte l’un OU l’autre format.
326
+ # rows: Optional[List[PredictRow]] = Field(
327
+ # default=None, description="Format recommandé : liste d'objets typés."
328
+ # )
329
+ # input: Optional[List[List[float]]] = Field(
330
+ # default=None,
331
+ # description=f"Format legacy : matrice. Chaque ligne doit suivre l'ordre strict: {FEATURES}",
332
+ # )
333
+
334
+
335
+ # # =======================
336
+ # # Routes
337
+ # # =======================
338
+ # @app.get("/")
339
+ # def root():
340
+ # return {
341
+ # "message": "Bienvenue sur l’API Getaround 🚗 — utilisez POST /predict",
342
+ # "docs": "/docs",
343
+ # "dashboard": "https://flodussart-getaround-streamlit.hf.space",
344
+ # "model_path": LOCAL_MODEL_PATH,
345
+ # "features": FEATURES,
346
+ # }
347
+
348
+
349
+ # @app.get("/healthz")
350
+ # def healthz():
351
+ # return {"status": "ok", "features": FEATURES}
352
+
353
+
354
+ # def build_df_from_payload(payload: PredictPayload) -> pd.DataFrame:
355
+ # """
356
+ # Construit le DataFrame d'entrée à partir du payload.
357
+ # Priorité au format 'rows' (typé). Sinon 'input' (ordre strict).
358
+ # """
359
+ # if payload.rows:
360
+ # df = pd.DataFrame([r.dict() for r in payload.rows])
361
+ # missing = [c for c in FEATURES if c not in df.columns]
362
+ # if missing:
363
+ # raise HTTPException(
364
+ # status_code=400,
365
+ # detail=f"Colonnes manquantes: {missing}. Attendu: {FEATURES}",
366
+ # )
367
+ # return df[FEATURES]
368
+
369
+ # if payload.input:
370
+ # n_feat = len(FEATURES)
371
+ # bad = [i for i, row in enumerate(payload.input) if len(row) != n_feat]
372
+ # if bad:
373
+ # raise HTTPException(
374
+ # status_code=400,
375
+ # detail=f"Lignes {bad} n'ont pas {n_feat} valeurs. Ordre attendu: {FEATURES}",
376
+ # )
377
+ # return pd.DataFrame(payload.input, columns=FEATURES)
378
+
379
+ # raise HTTPException(
380
+ # status_code=400,
381
+ # detail="Fournis soit 'rows': [{feature: value}], soit 'input': [[...]].",
382
+ # )
383
 
 
 
 
 
 
384
 
385
+ # @app.post("/predict")
386
+ # def predict(payload: PredictPayload):
387
+ # try:
388
+ # df = build_df_from_payload(payload)
389
+ # y_hat = model.predict(df)
390
+ # preds = [float(x) for x in (y_hat.tolist() if hasattr(y_hat, "tolist") else y_hat)]
391
+ # return {"prediction": preds}
392
+ # except HTTPException:
393
+ # raise
394
+ # except Exception as e:
395
+ # raise HTTPException(status_code=500, detail=f"Erreur lors de la prédiction : {e}")
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
+ # if __name__ == "__main__":
399
+ # uvicorn.run(app, host="0.0.0.0", port=4000)
 
400
 
401
  import os
402
  import json
403
  from pathlib import Path
404
+ from typing import Optional, Any
405
 
406
  import pandas as pd
 
 
 
407
  import mlflow.pyfunc
408
+ import uvicorn
409
+ from fastapi import FastAPI, HTTPException
410
+ from fastapi.middleware.cors import CORSMiddleware
411
+ from pydantic import BaseModel, Field, field_validator
412
+ from unidecode import unidecode
413
 
414
 
415
  # =======================
416
  # Config
417
  # =======================
418
+ PORT = int(os.getenv("PORT", 7860)) # HF fournit $PORT; 7860 par défaut
419
  LOCAL_MODEL_PATH = os.getenv("MODEL_PATH", "model_bundle/model")
420
 
421
 
422
  # =======================
423
  # Helpers
424
  # =======================
425
+ def load_features_from_artifacts(model_dir: str) -> list[str]:
426
  """
427
  Essaie de lire artifacts/features_used.json (déposé lors du training).
428
+ Fallback : colonnes cohérentes avec la version 'grouped' (alignée API).
429
  """
430
  fp = Path(model_dir) / "artifacts" / "features_used.json"
431
  if fp.exists():
 
434
  list(data.get("categorical", [])) + \
435
  list(data.get("boolean", []))
436
 
437
+ # Fallback : 100% colonnes grouped pour rester cohérent.
438
  return [
439
  # numeric
440
  "mileage", "engine_power",
441
+ # categorical (grouped/normalisées)
442
+ "model_key", "fuel_grouped", "paint_color", "car_type",
443
  # boolean
444
  "private_parking_available", "has_gps", "has_air_conditioning",
445
  "automatic_car", "has_getaround_connect", "has_speed_regulator", "winter_tires",
 
447
 
448
 
449
  # =======================
450
+ # App
451
  # =======================
452
  app = FastAPI(
453
  title="🚗 Getaround Pricing API",
 
462
  redoc_url="/redoc",
463
  )
464
 
465
+ # CORS (utile si l'app Streamlit appelle directement l'API depuis le navigateur)
466
+ app.add_middleware(
467
+ CORSMiddleware,
468
+ allow_origins=["*"], # ou mets ton domaine exact si tu préfères
469
+ allow_credentials=True,
470
+ allow_methods=["*"],
471
+ allow_headers=["*"],
472
+ )
473
+
474
+ # Charger le modèle MLflow (depuis le bundle local)
475
  try:
476
  model = mlflow.pyfunc.load_model(LOCAL_MODEL_PATH)
477
  except Exception as e:
478
  raise RuntimeError(f"❌ Impossible de charger le modèle local '{LOCAL_MODEL_PATH}': {e}")
479
 
480
+ FEATURES: list[str] = load_features_from_artifacts(LOCAL_MODEL_PATH)
481
 
482
 
483
  # =======================
484
+ # Pydantic v2 Schemas (+ normalisation)
485
  # =======================
486
+ ALLOWED_FUEL = {"diesel", "petrol", "other"}
487
+ ALLOWED_PAINT = {"black", "grey", "blue", "white", "brown", "silver", "red", "beige", "green", "orange", "other"}
488
+ ALLOWED_CARTYPE = {"estate", "sedan", "suv", "hatchback", "subcompact", "coupe", "convertible", "van", "other"}
489
+
490
+ KNOWN_MODELS = {
491
+ "citroen","renault","bmw","peugeot","audi","nissan","mitsubishi","mercedes",
492
+ "volkswagen","toyota","seat","subaru","pgo","opel","ferrari","maserati",
493
+ "suzuki","ford","porsche","alfa romeo","kia motors","fiat","lamborghini",
494
+ "lexus","honda","mazda","yamaha","other",
495
+ }
496
+
497
+ STRICT = False # True => rejette les valeurs inconnues (400)
498
+
499
+ def _norm(x: Any) -> str:
500
+ return unidecode(str(x)).strip().lower() # "Citroën" -> "citroen"
501
+
502
+
503
  class PredictRow(BaseModel):
504
+ # colonnes alignées avec le training (version grouped)
505
  mileage: float
506
  engine_power: float
507
+ model_key_grouped: str
508
  fuel_grouped: str
509
+ paint_color_grouped: str
510
  car_type_grouped: str
511
  private_parking_available: bool
512
  has_gps: bool
 
516
  has_speed_regulator: bool
517
  winter_tires: bool
518
 
519
+ # --- Normalisation en amont (before) ---
520
+ @field_validator("model_key_grouped", "fuel_grouped", "paint_color_grouped", "car_type_grouped", mode="before")
521
+ @classmethod
522
+ def _normalize(cls, v):
523
+ return _norm(v)
524
+
525
+ # --- Contrôles de domaine + fallback "other" ---
526
+ @field_validator("fuel_grouped")
527
+ @classmethod
528
+ def _fuel(cls, v: str):
529
+ if v in ALLOWED_FUEL:
530
+ return v
531
+ if STRICT:
532
+ raise ValueError(f"fuel_grouped must be in {sorted(ALLOWED_FUEL)}")
533
+ return "other"
534
+
535
+ @field_validator("paint_color_grouped")
536
+ @classmethod
537
+ def _paint(cls, v: str):
538
+ if v in ALLOWED_PAINT:
539
+ return v
540
+ if STRICT:
541
+ raise ValueError(f"paint_color_grouped must be in {sorted(ALLOWED_PAINT)}")
542
+ return "other"
543
+
544
+ @field_validator("car_type_grouped")
545
+ @classmethod
546
+ def _ctype(cls, v: str):
547
+ if v in ALLOWED_CARTYPE:
548
+ return v
549
+ if STRICT:
550
+ raise ValueError(f"car_type_grouped must be in {sorted(ALLOWED_CARTYPE)}")
551
+ return "other"
552
+
553
+ @field_validator("model_key_grouped")
554
+ @classmethod
555
+ def _model(cls, v: str):
556
+ if v in KNOWN_MODELS:
557
+ return v
558
+ if STRICT:
559
+ raise ValueError("unknown model_key_grouped")
560
+ return "other"
561
+
562
 
563
  class PredictPayload(BaseModel):
564
+ """
565
+ On accepte l’un OU l’autre format :
566
+ - rows : recommandé (objets typés)
567
+ - input : legacy (matrice) — l'ordre doit suivre FEATURES.
568
+ """
569
+ rows: Optional[list[PredictRow]] = Field(default=None)
570
+ input: Optional[list[list[Any]]] = Field(
571
  default=None,
572
+ description="Format legacy : matrice. Chaque ligne doit suivre l'ordre strict: {}".format(FEATURES),
573
  )
574
 
575
 
 
598
  Priorité au format 'rows' (typé). Sinon 'input' (ordre strict).
599
  """
600
  if payload.rows:
601
+ # Pydantic v2 -> .model_dump()
602
+ df = pd.DataFrame([r.model_dump() for r in payload.rows])
603
  missing = [c for c in FEATURES if c not in df.columns]
604
  if missing:
605
  raise HTTPException(
606
  status_code=400,
607
  detail=f"Colonnes manquantes: {missing}. Attendu: {FEATURES}",
608
  )
609
+ # Réordonne les colonnes exactement comme au training
610
  return df[FEATURES]
611
 
612
  if payload.input:
 
630
  try:
631
  df = build_df_from_payload(payload)
632
  y_hat = model.predict(df)
633
+ # numpy array -> python floats
634
  preds = [float(x) for x in (y_hat.tolist() if hasattr(y_hat, "tolist") else y_hat)]
635
  return {"prediction": preds}
636
  except HTTPException:
 
640
 
641
 
642
  if __name__ == "__main__":
643
+ # Uvicorn direct (utile en local). Sur HF, le CMD du Dockerfile lancera ça.
644
+ uvicorn.run("app:app", host="0.0.0.0", port=PORT)
645
+
model_bundle/model/MLmodel CHANGED
@@ -14,7 +14,7 @@ flavors:
14
  serialization_format: cloudpickle
15
  sklearn_version: 1.7.2
16
  mlflow_version: 2.9.2
17
- model_size_bytes: 1024217
18
- model_uuid: fbd87ce7587c4bc9af89a2cd1a641c46
19
- run_id: 31dc5fd4868d4e8b8eb76736b79b4f7f
20
- utc_time_created: '2025-10-26 10:52:57.335412'
 
14
  serialization_format: cloudpickle
15
  sklearn_version: 1.7.2
16
  mlflow_version: 2.9.2
17
+ model_size_bytes: 1006815
18
+ model_uuid: 514add3140a2447bb144ef25fdd35803
19
+ run_id: c525ab61c63348f48171b68ee556b6b5
20
+ utc_time_created: '2025-10-27 10:50:29.219164'
model_bundle/model/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59019bf878ec9fe6b25dc236d357788aa483ba4ecd6873931dea9d49322db680
3
- size 1024217
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a156e7ebf77ac9080867b389d6899fddad7e283984ab93fd8641eabc6cdbe73
3
+ size 1006815
requirements.txt CHANGED
@@ -1,12 +1,13 @@
1
- fastapi[standard]
2
- pydantic
3
  pandas
4
  scikit-learn==1.7.2
5
  mlflow==2.9.2
6
  lightgbm==4.6.0
7
  uvicorn
8
  python-dotenv
9
- fsspec
10
  s3fs
11
  boto3
12
- setuptools>=68,<72
 
 
1
+ fastapi[standard]>=0.103
2
+ pydantic>=2.4,<3
3
  pandas
4
  scikit-learn==1.7.2
5
  mlflow==2.9.2
6
  lightgbm==4.6.0
7
  uvicorn
8
  python-dotenv
9
+ fspec
10
  s3fs
11
  boto3
12
+ setuptools>=68,<72
13
+ unidecode