File size: 5,831 Bytes
2c5ba72 0374656 2c5ba72 0374656 2c5ba72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import gradio as gr
import pandas as pd
from loguru import logger
from projet_05.branding import apply_brand_theme
from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
MODEL_PATH = Path("models/best_model.joblib")
METADATA_PATH = Path("models/best_model_meta.json")
SCHEMA_PATH = Path("data/processed/schema.json")
def _load_schema(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
return json.loads(path.read_text(encoding="utf-8"))
def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
if schema:
candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
if candidates:
return candidates
features = metadata.get("features", {})
explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
if explicit:
return explicit
if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
return list(pipeline.feature_names_in_)
return []
def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
if isinstance(payload, pd.DataFrame):
df = payload.copy()
elif payload is None:
df = pd.DataFrame(columns=headers)
else:
df = pd.DataFrame(payload, columns=headers if headers else None)
df = df.dropna(how="all")
if df.empty:
raise gr.Error("Merci de saisir au moins une ligne complète.")
return df
def _ensure_model():
if PIPELINE is None:
raise gr.Error(
"Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
)
def score_table(table):
_ensure_model()
df = _convert_input(table, FEATURE_ORDER)
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
return run_inference(
df,
PIPELINE,
THRESHOLD,
drop_columns=drop_cols,
required_features=FEATURE_ORDER or None,
)
def score_csv(upload):
_ensure_model()
if upload is None:
raise gr.Error("Veuillez déposer un fichier CSV.")
df = pd.read_csv(upload.name)
drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
return run_inference(
df,
PIPELINE,
THRESHOLD,
drop_columns=drop_cols,
required_features=FEATURE_ORDER or None,
)
def predict_from_form(*values):
_ensure_model()
if not FEATURE_ORDER:
raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
payload = {feature: value for feature, value in zip(FEATURE_ORDER, values)}
df = pd.DataFrame([payload])
scored = run_inference(
df,
PIPELINE,
THRESHOLD,
required_features=FEATURE_ORDER or None,
)
row = scored.iloc[0]
label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
return {
"probability": round(float(row["proba_depart"]), 4),
"decision": label,
"threshold": THRESHOLD,
}
# Chargement des artéfacts
apply_brand_theme()
PIPELINE = None
METADATA: dict[str, Any] = {}
THRESHOLD = 0.5
TARGET_COLUMN: str | None = None
SCHEMA = _load_schema(SCHEMA_PATH)
try:
PIPELINE = load_pipeline(MODEL_PATH)
METADATA = load_metadata(METADATA_PATH)
THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
TARGET_COLUMN = METADATA.get("target")
except FileNotFoundError as exc:
logger.warning("Artéfact manquant: {}", exc)
FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
with gr.Blocks(title="Prédicteur d'attrition") as demo:
gr.Markdown("# API Gradio – Prédiction de départ employé")
gr.Markdown(
"Le modèle applique le pipeline entraîné hors-notebook pour fournir une probabilité de départ ainsi qu'une décision binaire."
)
if PIPELINE is None:
gr.Markdown(
"⚠️ **Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
)
else:
gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")
with gr.Tab("Formulaire unitaire"):
if not FEATURE_ORDER:
gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
else:
form_inputs: list[gr.components.Component] = [] # type: ignore
for feature in FEATURE_ORDER:
form_inputs.append(
gr.Textbox(label=feature, placeholder=f"Saisir {feature.replace('_', ' ')}")
)
form_output = gr.JSON(label="Résultat")
gr.Button("Prédire").click(
fn=predict_from_form,
inputs=form_inputs,
outputs=form_output,
)
with gr.Tab("Tableau interactif"):
table_input = gr.Dataframe(
headers=FEATURE_ORDER if FEATURE_ORDER else None,
row_count=(1, "dynamic"),
col_count=(len(FEATURE_ORDER), "dynamic") if FEATURE_ORDER else (5, "dynamic"),
type="pandas",
)
table_output = gr.Dataframe(label="Prédictions", type="pandas")
gr.Button("Scorer les lignes").click(
fn=score_table,
inputs=table_input,
outputs=table_output,
)
with gr.Tab("Fichier CSV"):
file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
gr.Button("Scorer le fichier").click(
fn=score_csv,
inputs=file_input,
outputs=file_output,
)
if __name__ == "__main__":
demo.launch()
|