Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .gitignore +1 -3
- app.py +70 -43
- scripts/register_model_to_hf.py +185 -0
.gitignore
CHANGED
|
@@ -36,11 +36,9 @@ secrets.json
|
|
| 36 |
data/raw/ # Pour datasets volumineux en data science (OC_P5)
|
| 37 |
notebooks/*.ipynb_checkpoints/
|
| 38 |
|
| 39 |
-
# MLflow
|
| 40 |
-
mlflow.db
|
| 41 |
mlflow.db-shm
|
| 42 |
mlflow.db-wal
|
| 43 |
-
mlruns/
|
| 44 |
mlflow_ui.log
|
| 45 |
mlflow_comparison.png
|
| 46 |
nohup.out
|
|
|
|
| 36 |
data/raw/ # Pour datasets volumineux en data science (OC_P5)
|
| 37 |
notebooks/*.ipynb_checkpoints/
|
| 38 |
|
| 39 |
+
# MLflow (logs seulement, garder DB et runs pour déploiement HF)
|
|
|
|
| 40 |
mlflow.db-shm
|
| 41 |
mlflow.db-wal
|
|
|
|
| 42 |
mlflow_ui.log
|
| 43 |
mlflow_comparison.png
|
| 44 |
nohup.out
|
app.py
CHANGED
|
@@ -8,42 +8,61 @@ Version de démonstration - Interface complète en développement.
|
|
| 8 |
import gradio as gr
|
| 9 |
import mlflow
|
| 10 |
import mlflow.sklearn
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
# Charger le modèle le plus récent
|
| 17 |
-
MODEL_URI = "models:/Employee_Turnover_Model/latest"
|
| 18 |
-
# Fallback: utiliser un run_id spécifique si le modèle n'est pas enregistré
|
| 19 |
-
FALLBACK_RUN_ID = "2dd66b2b125646e19cf123c6944c9185"
|
| 20 |
|
| 21 |
|
| 22 |
def load_model():
|
| 23 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
try:
|
| 25 |
-
model
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
except Exception as e:
|
| 29 |
-
print(f"⚠️
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
try:
|
|
|
|
| 31 |
model = mlflow.sklearn.load_model(f"runs:/{FALLBACK_RUN_ID}/model")
|
| 32 |
-
print(f"✅ Modèle chargé depuis
|
| 33 |
-
return model
|
| 34 |
except Exception as e2:
|
| 35 |
-
print(f"❌ Erreur
|
| 36 |
-
return None
|
| 37 |
|
| 38 |
|
| 39 |
# Charger le modèle au démarrage
|
| 40 |
try:
|
| 41 |
-
model = load_model()
|
| 42 |
MODEL_LOADED = model is not None
|
| 43 |
except Exception as e:
|
| 44 |
print(f"❌ Erreur lors du chargement du modèle: {e}")
|
| 45 |
MODEL_LOADED = False
|
| 46 |
model = None
|
|
|
|
| 47 |
|
| 48 |
|
| 49 |
def get_model_info():
|
|
@@ -51,35 +70,43 @@ def get_model_info():
|
|
| 51 |
if not MODEL_LOADED:
|
| 52 |
return {
|
| 53 |
"status": "❌ Modèle non disponible",
|
| 54 |
-
"error": "Le modèle n'a pas pu être chargé
|
| 55 |
-
"solution": "Vérifiez que
|
| 56 |
}
|
| 57 |
|
| 58 |
try:
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
"
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
except Exception as e:
|
| 85 |
return {"status": "✅ Modèle chargé (info limitées)", "error": str(e)}
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
import mlflow
|
| 10 |
import mlflow.sklearn
|
| 11 |
+
from huggingface_hub import hf_hub_download
|
| 12 |
+
from pathlib import Path
|
| 13 |
|
| 14 |
+
# Configuration
|
| 15 |
+
HF_MODEL_REPO = "ASI-Engineer/employee-turnover-model"
|
| 16 |
+
FALLBACK_RUN_ID = "40e43c8e425345bab3d19f27eb8fe5d8"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def load_model():
|
| 20 |
+
"""
|
| 21 |
+
Charge le modèle depuis Hugging Face Hub (prod) ou MLflow local (dev).
|
| 22 |
+
|
| 23 |
+
Ordre de priorité:
|
| 24 |
+
1. HF Hub (modèle déployé en production)
|
| 25 |
+
2. MLflow local (développement local)
|
| 26 |
+
"""
|
| 27 |
+
# Essayer HF Hub en premier (production)
|
| 28 |
try:
|
| 29 |
+
# Download model from HF Hub
|
| 30 |
+
model_path = hf_hub_download(
|
| 31 |
+
repo_id=HF_MODEL_REPO, filename="model/model.pkl", repo_type="model"
|
| 32 |
+
)
|
| 33 |
+
model = mlflow.sklearn.load_model(str(Path(model_path).parent))
|
| 34 |
+
print(f"✅ Modèle chargé depuis HF Hub: {HF_MODEL_REPO}")
|
| 35 |
+
return model, "HF Hub"
|
| 36 |
except Exception as e:
|
| 37 |
+
print(f"⚠️ HF Hub non disponible: {e}")
|
| 38 |
+
|
| 39 |
+
# Fallback: MLflow local (développement)
|
| 40 |
+
mlflow.set_tracking_uri("sqlite:///mlflow.db")
|
| 41 |
+
try:
|
| 42 |
+
# Essayer Model Registry d'abord
|
| 43 |
+
model = mlflow.sklearn.load_model("models:/XGBoost_Employee_Turnover/latest")
|
| 44 |
+
print("✅ Modèle chargé depuis MLflow Model Registry")
|
| 45 |
+
return model, "MLflow Registry"
|
| 46 |
+
except Exception:
|
| 47 |
try:
|
| 48 |
+
# Fallback sur run ID
|
| 49 |
model = mlflow.sklearn.load_model(f"runs:/{FALLBACK_RUN_ID}/model")
|
| 50 |
+
print(f"✅ Modèle chargé depuis MLflow run: {FALLBACK_RUN_ID}")
|
| 51 |
+
return model, "MLflow Local"
|
| 52 |
except Exception as e2:
|
| 53 |
+
print(f"❌ Erreur chargement MLflow: {e2}")
|
| 54 |
+
return None, "Error"
|
| 55 |
|
| 56 |
|
| 57 |
# Charger le modèle au démarrage
|
| 58 |
try:
|
| 59 |
+
model, model_source = load_model()
|
| 60 |
MODEL_LOADED = model is not None
|
| 61 |
except Exception as e:
|
| 62 |
print(f"❌ Erreur lors du chargement du modèle: {e}")
|
| 63 |
MODEL_LOADED = False
|
| 64 |
model = None
|
| 65 |
+
model_source = "Error"
|
| 66 |
|
| 67 |
|
| 68 |
def get_model_info():
|
|
|
|
| 70 |
if not MODEL_LOADED:
|
| 71 |
return {
|
| 72 |
"status": "❌ Modèle non disponible",
|
| 73 |
+
"error": "Le modèle n'a pas pu être chargé",
|
| 74 |
+
"solution": "Vérifiez que le modèle est bien enregistré sur HF Hub ou entraîné localement",
|
| 75 |
}
|
| 76 |
|
| 77 |
try:
|
| 78 |
+
info = {
|
| 79 |
+
"status": "✅ Modèle chargé avec succès",
|
| 80 |
+
"source": model_source,
|
| 81 |
+
"model_type": type(model).__name__,
|
| 82 |
+
"features": "~50 features (après preprocessing)",
|
| 83 |
+
"algorithme": "XGBoost + SMOTE",
|
| 84 |
+
"hf_hub_repo": HF_MODEL_REPO if model_source == "HF Hub" else "N/A",
|
| 85 |
+
}
|
| 86 |
|
| 87 |
+
# Si MLflow local, ajouter les métriques
|
| 88 |
+
if model_source == "MLflow Local":
|
| 89 |
+
mlflow.set_tracking_uri("sqlite:///mlflow.db")
|
| 90 |
+
client = mlflow.MlflowClient()
|
| 91 |
+
runs = client.search_runs(
|
| 92 |
+
experiment_ids=["1"], order_by=["start_time DESC"], max_results=1
|
| 93 |
+
)
|
| 94 |
+
if runs:
|
| 95 |
+
run = runs[0]
|
| 96 |
+
metrics = run.data.metrics
|
| 97 |
+
info.update(
|
| 98 |
+
{
|
| 99 |
+
"run_id": run.info.run_id[:8],
|
| 100 |
+
"f1_score": f"{metrics.get('f1_score', 0):.4f}",
|
| 101 |
+
"accuracy": f"{metrics.get('accuracy', 0):.4f}",
|
| 102 |
+
}
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
info["info"] = "Interface de prédiction en développement - API FastAPI à venir"
|
| 106 |
+
return info
|
| 107 |
+
|
| 108 |
+
except Exception as e:
|
| 109 |
+
return {"status": "✅ Modèle chargé (info limitées)", "error": str(e)}
|
| 110 |
|
| 111 |
except Exception as e:
|
| 112 |
return {"status": "✅ Modèle chargé (info limitées)", "error": str(e)}
|
scripts/register_model_to_hf.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Script pour enregistrer le modèle MLflow dans Hugging Face Hub.
|
| 4 |
+
|
| 5 |
+
Usage:
|
| 6 |
+
python scripts/register_model_to_hf.py
|
| 7 |
+
|
| 8 |
+
Prérequis:
|
| 9 |
+
- HF_TOKEN configuré dans l'environnement ou .env
|
| 10 |
+
- Modèle entraîné dans MLflow
|
| 11 |
+
"""
|
| 12 |
+
import os
|
| 13 |
+
import shutil
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
import mlflow
|
| 17 |
+
import mlflow.sklearn
|
| 18 |
+
from huggingface_hub import HfApi, login
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def register_model_to_hf(
|
| 22 |
+
run_id: str,
|
| 23 |
+
hf_repo_id: str = "ASI-Engineer/employee-turnover-model",
|
| 24 |
+
model_name: str = "Employee_Turnover_XGBoost",
|
| 25 |
+
):
|
| 26 |
+
"""
|
| 27 |
+
Enregistre le modèle MLflow dans le Model Registry puis l'exporte vers HF Hub.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
run_id: ID du run MLflow contenant le meilleur modèle
|
| 31 |
+
hf_repo_id: Repository Hugging Face (format: username/repo-name)
|
| 32 |
+
model_name: Nom du modèle dans MLflow Model Registry
|
| 33 |
+
"""
|
| 34 |
+
print("=" * 80)
|
| 35 |
+
print("🚀 ENREGISTREMENT DU MODÈLE DANS HUGGING FACE HUB")
|
| 36 |
+
print("=" * 80)
|
| 37 |
+
print()
|
| 38 |
+
|
| 39 |
+
# Configuration MLflow
|
| 40 |
+
mlflow.set_tracking_uri("sqlite:///mlflow.db")
|
| 41 |
+
|
| 42 |
+
# 1. Enregistrer dans MLflow Model Registry
|
| 43 |
+
print("📦 Étape 1: Enregistrement dans MLflow Model Registry...")
|
| 44 |
+
model_uri = f"runs:/{run_id}/model"
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Créer ou mettre à jour le modèle dans le registry
|
| 48 |
+
model_version = mlflow.register_model(model_uri, model_name)
|
| 49 |
+
print(f" ✅ Modèle enregistré: {model_name} version {model_version.version}")
|
| 50 |
+
print(f" 📍 Run ID: {run_id}")
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f" ℹ️ Modèle déjà enregistré ou erreur: {e}")
|
| 53 |
+
model_version = None
|
| 54 |
+
|
| 55 |
+
print()
|
| 56 |
+
|
| 57 |
+
# 2. Charger le modèle
|
| 58 |
+
print("📥 Étape 2: Chargement du modèle depuis MLflow...")
|
| 59 |
+
# Essayer depuis le Model Registry d'abord
|
| 60 |
+
try:
|
| 61 |
+
model = mlflow.sklearn.load_model(f"models:/{model_name}/latest")
|
| 62 |
+
print(f" ✅ Modèle chargé depuis Model Registry: {model_name}")
|
| 63 |
+
except Exception:
|
| 64 |
+
# Fallback: charger depuis le run
|
| 65 |
+
model = mlflow.sklearn.load_model(model_uri)
|
| 66 |
+
print(f" ✅ Modèle chargé depuis run: {run_id[:8]}")
|
| 67 |
+
print(f" 📦 Type: {type(model).__name__}")
|
| 68 |
+
print()
|
| 69 |
+
|
| 70 |
+
# 3. Exporter vers dossier temporaire
|
| 71 |
+
print("💾 Étape 3: Export du modèle...")
|
| 72 |
+
export_dir = Path("./model_export")
|
| 73 |
+
export_dir.mkdir(exist_ok=True)
|
| 74 |
+
|
| 75 |
+
# Sauvegarder le modèle au format MLflow
|
| 76 |
+
mlflow.sklearn.save_model(model, str(export_dir / "model"))
|
| 77 |
+
|
| 78 |
+
# Créer un README pour HF
|
| 79 |
+
readme_content = f"""---
|
| 80 |
+
tags:
|
| 81 |
+
- employee-turnover
|
| 82 |
+
- xgboost
|
| 83 |
+
- mlflow
|
| 84 |
+
- classification
|
| 85 |
+
library_name: scikit-learn
|
| 86 |
+
---
|
| 87 |
+
|
| 88 |
+
# Employee Turnover Prediction Model
|
| 89 |
+
|
| 90 |
+
Modèle XGBoost pour prédire le turnover des employés.
|
| 91 |
+
|
| 92 |
+
## Métriques
|
| 93 |
+
- **F1-Score**: Optimisé pour classes déséquilibrées
|
| 94 |
+
- **Algorithme**: XGBoost avec SMOTE
|
| 95 |
+
- **MLflow Run ID**: `{run_id}`
|
| 96 |
+
|
| 97 |
+
## Utilisation
|
| 98 |
+
|
| 99 |
+
```python
|
| 100 |
+
import mlflow
|
| 101 |
+
|
| 102 |
+
# Charger depuis Hugging Face Hub
|
| 103 |
+
model = mlflow.sklearn.load_model("hf://{hf_repo_id}")
|
| 104 |
+
|
| 105 |
+
# Prédiction
|
| 106 |
+
predictions = model.predict(X)
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
## Preprocessing
|
| 110 |
+
Les artifacts de preprocessing (scaler, encoders) sont disponibles dans MLflow.
|
| 111 |
+
|
| 112 |
+
## Repository
|
| 113 |
+
[GitHub - OC_P5](https://github.com/chaton59/OC_P5)
|
| 114 |
+
"""
|
| 115 |
+
|
| 116 |
+
with open(export_dir / "README.md", "w", encoding="utf-8") as f:
|
| 117 |
+
f.write(readme_content)
|
| 118 |
+
|
| 119 |
+
print(f" ✅ Modèle exporté vers: {export_dir}")
|
| 120 |
+
print()
|
| 121 |
+
|
| 122 |
+
# 4. Upload vers Hugging Face Hub
|
| 123 |
+
print("☁️ Étape 4: Upload vers Hugging Face Hub...")
|
| 124 |
+
|
| 125 |
+
# Login HF (utilise HF_TOKEN depuis env)
|
| 126 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 127 |
+
if not hf_token:
|
| 128 |
+
print(" ⚠️ HF_TOKEN non trouvé dans l'environnement")
|
| 129 |
+
print(
|
| 130 |
+
" 💡 Conseil: Créez un token sur https://huggingface.co/settings/tokens"
|
| 131 |
+
)
|
| 132 |
+
print(" 💡 Puis: export HF_TOKEN='your_token_here'")
|
| 133 |
+
return False
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
login(token=hf_token, add_to_git_credential=False)
|
| 137 |
+
print(" ✅ Authentification Hugging Face réussie")
|
| 138 |
+
|
| 139 |
+
# Upload
|
| 140 |
+
api = HfApi()
|
| 141 |
+
api.create_repo(
|
| 142 |
+
repo_id=hf_repo_id, repo_type="model", exist_ok=True, private=False
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
api.upload_folder(
|
| 146 |
+
repo_id=hf_repo_id,
|
| 147 |
+
folder_path=str(export_dir),
|
| 148 |
+
repo_type="model",
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
print(f" ✅ Modèle uploadé vers: https://huggingface.co/{hf_repo_id}")
|
| 152 |
+
print()
|
| 153 |
+
|
| 154 |
+
# Nettoyage
|
| 155 |
+
shutil.rmtree(export_dir)
|
| 156 |
+
print(" 🧹 Dossier temporaire nettoyé")
|
| 157 |
+
|
| 158 |
+
return True
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f" ❌ Erreur lors de l'upload: {e}")
|
| 162 |
+
return False
|
| 163 |
+
|
| 164 |
+
finally:
|
| 165 |
+
print()
|
| 166 |
+
print("=" * 80)
|
| 167 |
+
print("✅ ENREGISTREMENT TERMINÉ")
|
| 168 |
+
print("=" * 80)
|
| 169 |
+
print()
|
| 170 |
+
print(f"🔗 Modèle disponible sur: https://huggingface.co/{hf_repo_id}")
|
| 171 |
+
print("📝 Pour utiliser dans app.py:")
|
| 172 |
+
print(f' model = mlflow.sklearn.load_model("hf://{hf_repo_id}")')
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
if __name__ == "__main__":
|
| 176 |
+
# Utiliser le meilleur run (le plus récent avec modèle)
|
| 177 |
+
RUN_ID = "40e43c8e425345bab3d19f27eb8fe5d8"
|
| 178 |
+
|
| 179 |
+
success = register_model_to_hf(
|
| 180 |
+
run_id=RUN_ID, hf_repo_id="ASI-Engineer/employee-turnover-model"
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
if not success:
|
| 184 |
+
print("\n⚠️ Enregistrement incomplet. Vérifiez HF_TOKEN.")
|
| 185 |
+
exit(1)
|