Spaces:

stephmnt
/

projet_05

Sleeping

File size: 27,239 Bytes

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

import gradio as gr
import numpy as np
import pandas as pd
from loguru import logger
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine

from projet_05.branding import apply_brand_theme
from projet_05.modeling.predict import load_metadata, load_pipeline, run_inference
from projet_05.settings import load_settings
from projet_05 import dataset as ds

MODEL_PATH = Path("models/best_model.joblib")
METADATA_PATH = Path("models/best_model_meta.json")
SCHEMA_PATH = Path("data/processed/schema.json")
DERIVED_FEATURES = {
    "augmentation_par_revenu",
    "annee_sur_poste_par_experience",
    "nb_formation_par_experience",
    "score_moyen_satisfaction",
    "dern_promo_par_experience",
    "evolution_note",
}
SATISFACTION_COLUMNS = [
    "satisfaction_employee_environnement",
    "satisfaction_employee_nature_travail",
    "satisfaction_employee_equipe",
    "satisfaction_employee_equilibre_pro_perso",
]
NUMERIC_CODE_COLUMNS = ["niveau_hierarchique_poste", "niveau_education"]
NUMERIC_FEATURES: set[str] = set()
CATEGORICAL_FEATURES: set[str] = set()

# Configuration manuelle des champs d'entrée (label + placeholder).
FIELD_UI_CONFIG = [
    {"name": "age", "label": "Âge", "placeholder": "Âge en années (ex : 35)"},
    {"name": "revenu_mensuel", "label": "Revenu mensuel (€)", "placeholder": "Montant mensuel en euros (ex : 4500)"},
    {"name": "annees_dans_l_entreprise", "label": "Années dans l'entreprise", "placeholder": "Ancienneté totale (ex : 4.5)"},
    {"name": "annees_dans_le_poste_actuel", "label": "Années sur le poste actuel", "placeholder": "Durée dans le poste (ex : 2)"},
    {
        "name": "annees_depuis_la_derniere_promotion",
        "label": "Années depuis la dernière promotion",
        "placeholder": "Durée depuis la dernière promotion (ex : 1)",
    },
    {
        "name": "distance_domicile_travail",
        "label": "Distance domicile-travail (km)",
        "placeholder": "Distance en kilomètres (ex : 12)",
    },
    {
        "name": "nombre_participation_pee",
        "label": "Nombre de participations PEE",
        "placeholder": "Nombre de participations (entier)",
    },
    {
        "name": "note_evaluation_actuelle",
        "label": "Note d'évaluation actuelle",
        "placeholder": "Score actuel (1 à 5)",
    },
    {
        "name": "note_evaluation_precedente",
        "label": "Note d'évaluation précédente",
        "placeholder": "Score précédent (1 à 5)",
    },
    {
        "name": "annees_depuis_le_changement_deposte",
        "label": "Années depuis le dernier changement de poste",
        "placeholder": "Temps écoulé (ex : 0 si jamais)",
    },
    {
        "name": "annee_experience_totale",
        "label": "Années d'expérience totale",
        "placeholder": "Expérience cumulative (ex : 8)",
    },
    {
        "name": "nb_formations_suivies",
        "label": "Nombre de formations suivies",
        "placeholder": "Total des formations (entier)",
    },
    {
        "name": "satisfaction_employee_environnement",
        "label": "Satisfaction environnement",
        "placeholder": "Note de 1 (faible) à 5 (forte)",
        "info": "Valeur comprise entre 1 et 5",
    },
    {
        "name": "satisfaction_employee_nature_travail",
        "label": "Satisfaction nature du travail",
        "placeholder": "Note de 1 à 5",
        "info": "Valeur comprise entre 1 et 5",
    },
    {
        "name": "satisfaction_employee_equipe",
        "label": "Satisfaction équipe",
        "placeholder": "Note de 1 à 5",
        "info": "Valeur comprise entre 1 et 5",
    },
    {
        "name": "satisfaction_employee_equilibre_pro_perso",
        "label": "Satisfaction équilibre pro/perso",
        "placeholder": "Note de 1 à 5",
        "info": "Valeur comprise entre 1 et 5",
    },
    {
        "name": "genre",
        "label": "Genre",
        "component": "dropdown",
        "choices": ["Femme", "Homme"],
        "info": "Sélectionnez le genre",
    },
    {
        "name": "departement",
        "label": "Département",
        "component": "dropdown",
        "choices": ["Commercial", "Consulting", "Ressources Humaines"],
    },
    {
        "name": "frequence_deplacement",
        "label": "Fréquence des déplacements",
        "component": "dropdown",
        "choices": ["Aucun", "Occasionnel", "Frequent"],
    },
    {
        "name": "statut_marital",
        "label": "Statut marital",
        "component": "dropdown",
        "choices": ["Célibataire", "Marié(e)", "Divorcé(e)"],
    },
    {
        "name": "poste",
        "label": "Poste occupé",
        "component": "dropdown",
        "choices": [
            "Cadre Commercial",
            "Assistant de Direction",
            "Consultant",
            "Tech Lead",
            "Manager",
            "Senior Manager",
            "Représentant Commercial",
            "Directeur Technique",
            "Ressources Humaines",
        ],
    },
    {
        "name": "niveau_hierarchique_poste",
        "label": "Niveau hiérarchique",
        "component": "dropdown",
        "choices": [
            "1, junior",
            "2",
            "3",
            "4",
            "5, senior",
        ],
        "info": "Valeur numérique issue du SIRH (1 à 5)",
    },
    {
        "name": "niveau_education",
        "label": "Niveau d'études",
        "component": "dropdown",
        "choices": [
            "1, licence",
            "2",
            "3",
            "4",
            "5, master",
        ],
        "info": "Indice numérique (1 à 5) figurant dans les exports bruts",
    },
    {
        "name": "domaine_etude",
        "label": "Domaine d'étude",
        "component": "dropdown",
        "choices": ["Entrepreunariat", "Infra & Cloud", "Marketing", "Ressources Humaines", "Transformation Digitale"],
    },
    {
        "name": "heure_supplementaires",
        "label": "Heures supplémentaires",
        "component": "dropdown",
        "choices": ["Oui", "Non"],
    },
]
FIELD_UI_LOOKUP = {cfg["name"]: cfg for cfg in FIELD_UI_CONFIG}
try:
    SETTINGS = load_settings()
except Exception:  # pragma: no cover - remains optional when config absent
    SETTINGS = None
CACHED_ENGINE: Engine | None = None
CATEGORICAL_NORMALIZERS: dict[str, dict[str, str]] = {
    "genre": {
        "f": "F",
        "femme": "F",
        "m": "M",
        "homme": "M",
    },
    "statut_marital": {
        "célibataire": "Célibataire",
        "celibataire": "Célibataire",
        "marié(e)": "Marié(e)",
        "marie(e)": "Marié(e)",
        "marie": "Marié(e)",
        "marié": "Marié(e)",
        "divorcé(e)": "Divorcé(e)",
        "divorce(e)": "Divorcé(e)",
    },
    "departement": {
        "commercial": "Commercial",
        "consulting": "Consulting",
        "ressources humaines": "Ressources Humaines",
    },
    "poste": {
        "cadre commercial": "Cadre Commercial",
        "assistant de direction": "Assistant de Direction",
        "consultant": "Consultant",
        "tech lead": "Tech Lead",
        "manager": "Manager",
        "senior manager": "Senior Manager",
        "représentant commercial": "Représentant Commercial",
        "representant commercial": "Représentant Commercial",
        "directeur technique": "Directeur Technique",
        "ressources humaines": "Ressources Humaines",
    },
    "frequence_deplacement": {
        "aucun": "Aucun",
        "aucune": "Aucun",
        "occasionnel": "Occasionnel",
        "occasionnelle": "Occasionnel",
        "frequent": "Frequent",
        "fréquent": "Frequent",
    },
    "domaine_etude": {
        "entrepreunariat": "Entrepreunariat",
        "infra & cloud": "Infra & Cloud",
        "infra et cloud": "Infra & Cloud",
        "marketing": "Marketing",
        "ressources humaines": "Ressources Humaines",
        "transformation digitale": "Transformation Digitale",
    },
    "heure_supplementaires": {
        "oui": "Oui",
        "o": "Oui",
        "y": "Oui",
        "non": "Non",
        "n": "Non",
    },
    "niveau_hierarchique_poste": {
        "junior": "Junior",
        "confirmé": "Confirmé",
        "confirme": "Confirmé",
        "direction": "Direction",
        "senior": "Senior",
    },
    "niveau_education": {
        "licence": "Licence",
        "master": "Master",
        "doctorat": "Doctorat",
        "bts": "BTS",
    },
}


def _load_schema(path: Path) -> dict[str, Any]:
    """Load the schema definition stored as JSON.

    Args:
        path: Path to the schema.json file.

    Returns:
        A dictionary describing the schema or an empty dict if the file is missing.
    """
    if not path.exists():
        return {}
    return json.loads(path.read_text(encoding="utf-8"))


def _infer_features(metadata: dict, schema: dict, pipeline) -> list[str]:
    """Infer the ordered list of features expected by the model.

    Args:
        metadata: Metadata produced during training.
        schema: Schema derived from `features.py`.
        pipeline: Loaded sklearn pipeline (optional).

    Returns:
        List of feature names in the order expected by the model.
    """
    if schema:
        candidates = schema.get("numerical_features", []) + schema.get("categorical_features", [])
        if candidates:
            return candidates
    features = metadata.get("features", {})
    explicit = (features.get("numerical") or []) + (features.get("categorical") or [])
    if explicit:
        return explicit
    if pipeline is not None and hasattr(pipeline, "feature_names_in_"):
        return list(pipeline.feature_names_in_)
    return []


def _ensure_settings():
    """Ensure configuration settings are available for data fusion."""

    if SETTINGS is None:
        raise gr.Error(
            "Configuration introuvable. Placez `projet_05/settings.yml` dans le dépôt ou renseignez PROJET05_SETTINGS."
        )
    return SETTINGS


def _get_db_engine(settings: Settings) -> Engine: # pyright: ignore[reportUndefinedVariable]
    global CACHED_ENGINE
    if CACHED_ENGINE is not None:
        return CACHED_ENGINE
    if not settings.db_url:
        raise RuntimeError(
            "Aucune URL de base de données n'a été fournie. Configurez `database.url` dans settings.yml."
        )
    CACHED_ENGINE = create_engine(settings.db_url, future=True)
    return CACHED_ENGINE


def _convert_input(payload: Any, headers: list[str]) -> pd.DataFrame:
    """Normalize any user input into a validated DataFrame.

    Args:
        payload: Raw table coming from Gradio (DataFrame, list, etc.).
        headers: Expected column names.

    Returns:
        A sanitized DataFrame.

    Raises:
        gr.Error: If no valid row is provided.
    """
    if isinstance(payload, pd.DataFrame):
        df = payload.copy()
    elif payload is None:
        df = pd.DataFrame(columns=headers)
    else:
        df = pd.DataFrame(payload, columns=headers if headers else None)
    df = df.dropna(how="all")
    if df.empty:
        raise gr.Error("Merci de saisir au moins une ligne complète.")
    return df


def _read_uploaded_csv(upload, label: str) -> pd.DataFrame:
    """Load an uploaded CSV file or raise a user-friendly error."""

    if upload is None:
        raise gr.Error(f"Veuillez déposer le fichier {label}.")
    try:
        return pd.read_csv(upload.name)
    except Exception as exc:  # pragma: no cover - delegated to pandas
        raise gr.Error(f"Impossible de lire le fichier {label}: {exc}") from exc


def _resolve_field_ui(feature: str) -> tuple[str, str, str | None, str, dict[str, Any]]:
    """Return UI metadata (label, placeholder, info, component type, config)."""

    config = FIELD_UI_LOOKUP.get(feature, {})
    label = config.get("label") or feature.replace("_", " ").capitalize()
    placeholder = config.get("placeholder") or f"Saisir {label.lower()}"
    info = config.get("info")
    component = config.get("component", "textbox")
    return label, placeholder, info, component, config


def _build_input_component(feature: str) -> gr.components.Component: # type: ignore
    """Instantiate the appropriate Gradio component for a feature."""

    label, placeholder, info, component, config = _resolve_field_ui(feature)
    if component == "dropdown":
        choices = config.get("choices") or []
        default = config.get("default")
        allow_custom = config.get("allow_custom_value", False)
        return gr.Dropdown(
            label=label,
            choices=choices,
            value=default,
            info=info,
            allow_custom_value=allow_custom,
        )
    return gr.Textbox(label=label, placeholder=placeholder, info=info)


def _normalize_categorical_values(df: pd.DataFrame) -> pd.DataFrame:
    """Normalize friendly categorical values into the codes used by the model."""

    normalized = df.copy()

    def _normalize_value(value, mapping: dict[str, str]):
        if pd.isna(value):
            return value
        if isinstance(value, str):
            cleaned = value.strip()
            lowered = cleaned.lower()
            return mapping.get(lowered, cleaned)
        return mapping.get(value, value)

    for column, mapping in CATEGORICAL_NORMALIZERS.items():
        if column not in normalized.columns:
            continue
        normalized[column] = normalized[column].apply(lambda v, m=mapping: _normalize_value(v, m))
    for column in NUMERIC_CODE_COLUMNS:
        if column in normalized.columns:
            extracted = (
                normalized[column]
                .astype(str)
                .str.extract(r"(-?\d+(?:[.,]\d+)?)")[0]
                .str.replace(",", ".", regex=False)
            )
            normalized[column] = pd.to_numeric(extracted, errors="coerce")
    numeric_targets = [col for col in NUMERIC_FEATURES.union(DERIVED_FEATURES).union(NUMERIC_CODE_COLUMNS) if col in normalized.columns]
    for column in numeric_targets:
        normalized[column] = pd.to_numeric(normalized[column], errors="coerce")
    return normalized


def _apply_derived_features(df: pd.DataFrame) -> pd.DataFrame:
    """Recompute engineered ratios so end-users do not have to provide them."""

    enriched = _normalize_categorical_values(df)

    def _safe_ratio(numerator: str, denominator: str, output: str) -> None:
        if numerator not in enriched.columns or denominator not in enriched.columns:
            return
        numerator_series = pd.to_numeric(enriched[numerator], errors="coerce")
        denominator_series = pd.to_numeric(enriched[denominator], errors="coerce").replace(0, pd.NA)
        enriched[output] = numerator_series / denominator_series

    prev_raise_col = "augementation_salaire_precedente"
    if prev_raise_col in enriched:
        normalized = (
            enriched[prev_raise_col]
            .astype(str)
            .str.replace("%", "", regex=False)
            .str.replace(",", ".", regex=False)
            .str.strip()
        )
        enriched[prev_raise_col] = pd.to_numeric(normalized, errors="coerce") / 100

    _safe_ratio("augementation_salaire_precedente", "revenu_mensuel", "augmentation_par_revenu")
    _safe_ratio("annees_dans_le_poste_actuel", "annee_experience_totale", "annee_sur_poste_par_experience")
    _safe_ratio("nb_formations_suivies", "annee_experience_totale", "nb_formation_par_experience")
    _safe_ratio("annees_depuis_la_derniere_promotion", "annee_experience_totale", "dern_promo_par_experience")

    existing_sats = [col for col in SATISFACTION_COLUMNS if col in enriched.columns]
    if existing_sats:
        enriched["score_moyen_satisfaction"] = pd.DataFrame(
            {col: pd.to_numeric(enriched[col], errors="coerce") for col in existing_sats}
        ).mean(axis=1)

    if {"note_evaluation_actuelle", "note_evaluation_precedente"}.issubset(enriched.columns):
        enriched["evolution_note"] = pd.to_numeric(
            enriched["note_evaluation_actuelle"], errors="coerce"
        ) - pd.to_numeric(enriched["note_evaluation_precedente"], errors="coerce")

    return enriched.replace({pd.NA: np.nan})


def _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload) -> pd.DataFrame:
    """Merge raw SIRH / evaluation / sondage CSVs uploaded by the user."""

    settings = _ensure_settings()
    sirh = ds.clean_text_values(_read_uploaded_csv(sirh_upload, "SIRH")).pipe(
        ds._harmonize_id_column, settings.col_id, digits_only=True
    )
    evaluation = (
        ds.clean_text_values(_read_uploaded_csv(evaluation_upload, "évaluation"))
        .pipe(ds._rename_column, "eval_number", settings.col_id)
        .pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
    )
    sond = (
        ds.clean_text_values(_read_uploaded_csv(sond_upload, "sondage"))
        .pipe(ds._rename_column, "code_sondage", settings.col_id)
        .pipe(ds._harmonize_id_column, settings.col_id, digits_only=True)
    )

    for label, frame in {"SIRH": sirh, "évaluation": evaluation, "sondage": sond}.items():
        if frame.empty:
            raise gr.Error(f"Le fichier {label} est vide ou invalide.")
        if settings.col_id not in frame.columns:
            raise gr.Error(f"La colonne {settings.col_id} est absente du fichier {label}.")

    merged = sirh.merge(evaluation, on=settings.col_id, how="inner").merge(sond, on=settings.col_id, how="inner")
    if merged.empty:
        raise gr.Error("Aucune ligne résultante après fusion des trois fichiers (jointure INNER vide).")
    return merged


def _log_predictions(source: str, raw_inputs: pd.DataFrame, scored: pd.DataFrame) -> None:
    """Persist user interactions with the ML model into PostgreSQL."""

    if SETTINGS is None or not SETTINGS.db_url:
        return
    settings = _ensure_settings()
    try:
        engine = _get_db_engine(settings)
    except Exception as exc:  # pragma: no cover - logging best effort
        logger.error("Connexion impossible pour logger les interactions: {}", exc)
        return

    payload = raw_inputs.reindex(scored.index).fillna(value=pd.NA)
    col_id = settings.col_id
    records = []
    for idx, row in scored.iterrows():
        original = payload.loc[idx].to_dict() if idx in payload.index else {} # type: ignore
        records.append(
            {
                "id_employee": row.get(col_id),
                "probability": float(row.get("proba_depart", 0.0)),
                "decision": int(row.get("prediction", 0)),
                "threshold": THRESHOLD,
                "source": source,
                "payload": json.dumps(original, ensure_ascii=False, default=str),
            }
        )

    if not records:
        return

    try:
        pd.DataFrame(records).to_sql(
            "prediction_logs",
            engine,
            schema=settings.db_schema,
            if_exists="append",
            index=False,
            method="multi",
        )
    except Exception as exc:  # pragma: no cover - logging best effort
        logger.error("Impossible de journaliser les interactions: {}", exc)


def _ensure_model():
    """Ensure that a pipeline has been loaded before inference."""
    if PIPELINE is None:
        raise gr.Error(
            "Aucun modèle entrainé n'a été trouvé. Lancez `python projet_05/modeling/train.py` puis relancez l'application."
        )


def score_table(table):
    """Score data entered via the interactive table."""
    _ensure_model()
    df = _convert_input(table, INPUT_FEATURES)
    original = df.copy()
    df = _apply_derived_features(df)
    drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
    scored = run_inference(
        df,
        PIPELINE,
        THRESHOLD,
        drop_columns=drop_cols,
        required_features=FEATURE_ORDER or None,
    )
    _log_predictions("interactive_table", original, scored)
    return scored


def score_csv(upload):
    
    """Score a CSV uploaded by the user."""
    _ensure_model()
    if upload is None:
        raise gr.Error("Veuillez déposer un fichier CSV.")
    df = pd.read_csv(upload.name)
    original = df.copy()
    df = _apply_derived_features(df)
    drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
    scored = run_inference(
        df,
        PIPELINE,
        THRESHOLD,
        drop_columns=drop_cols,
        required_features=FEATURE_ORDER or None,
    )
    _log_predictions("csv_file", original, scored)
    return scored


def score_raw_files(sirh_upload, evaluation_upload, sond_upload):
    """Score three raw CSVs (SIRH, évaluation, sondage) after merging them."""

    _ensure_model()
    merged = _merge_raw_sources(sirh_upload, evaluation_upload, sond_upload)
    original = merged.copy()
    df = _apply_derived_features(merged)
    drop_cols = [TARGET_COLUMN] if TARGET_COLUMN else None
    scored = run_inference(
        df,
        PIPELINE,
        THRESHOLD,
        drop_columns=drop_cols,
        required_features=FEATURE_ORDER or None,
    )
    _log_predictions("raw_files", original, scored)
    return scored


def predict_from_form(*values):
    """Score a single row coming from the form tab."""
    _ensure_model()
    if not INPUT_FEATURES:
        raise gr.Error("Impossible de générer le formulaire sans configuration des features.")
    payload = {feature: value for feature, value in zip(INPUT_FEATURES, values)}
    df = pd.DataFrame([payload])
    original = df.copy()
    df = _apply_derived_features(df)
    scored = run_inference(
        df,
        PIPELINE,
        THRESHOLD,
        required_features=FEATURE_ORDER or None,
    )
    _log_predictions("form", original, scored)
    row = scored.iloc[0]
    label = "Risque de départ" if int(row["prediction"]) == 1 else "Reste probable"
    return {
        "probability": round(float(row["proba_depart"]), 4),
        "decision": label,
        "threshold": THRESHOLD,
    }


# Chargement des artéfacts
apply_brand_theme()

PIPELINE = None
METADATA: dict[str, Any] = {}
THRESHOLD = 0.5
TARGET_COLUMN: str | None = None
SCHEMA = _load_schema(SCHEMA_PATH)

try:
    PIPELINE = load_pipeline(MODEL_PATH)
    METADATA = load_metadata(METADATA_PATH)
    THRESHOLD = float(METADATA.get("best_threshold", THRESHOLD))
    TARGET_COLUMN = METADATA.get("target")
except FileNotFoundError as exc:
    logger.warning("Artéfact manquant: {}", exc)

FEATURE_ORDER = _infer_features(METADATA, SCHEMA, PIPELINE)
INPUT_FEATURES = [feature for feature in FEATURE_ORDER if feature not in DERIVED_FEATURES]
if not INPUT_FEATURES:
    INPUT_FEATURES = FEATURE_ORDER
numeric_from_schema = set(SCHEMA.get("numerical_features", []))
categorical_from_schema = set(SCHEMA.get("categorical_features", []))
if not numeric_from_schema:
    numeric_from_schema = set((METADATA.get("features", {}).get("numerical") or []))
if not categorical_from_schema:
    categorical_from_schema = set((METADATA.get("features", {}).get("categorical") or []))
NUMERIC_FEATURES = numeric_from_schema
CATEGORICAL_FEATURES = categorical_from_schema

with gr.Blocks(title="Prédicteur d'attrition") as demo:
    gr.Markdown("# OCR Projet 5 – Prédiction de départ employé")
    gr.HTML(
        """
        <div style="display:flex; gap:0.5rem; flex-wrap:wrap;">
            <a href="https://github.com/stephmnt/OCR_Projet05/releases" target="_blank" rel="noreferrer">
                <img src="https://img.shields.io/github/v/release/stephmnt/OCR_Projet05" alt="GitHub Release" />
            </a>
            <a href="https://github.com/stephmnt/OCR_Projet05/actions/workflows/deploy.yml" target="_blank" rel="noreferrer">
                <img src="https://img.shields.io/github/actions/workflow/status/stephmnt/OCR_Projet05/deploy.yml" alt="GitHub Actions Workflow Status" />
            </a>
            <a href="https://stephmnt.github.io/OCR_Projet05" target="_blank" rel="noreferrer">
                <img src="https://img.shields.io/badge/MkDocs-526CFE?logo=materialformkdocs&logoColor=fff" alt="MkDocs" />
            </a>
        </div>
        """
    )
    gr.Markdown(
        "Le modèle fournit une probabilité de départ ainsi qu'une décision binaire."
    )

    if PIPELINE is None:
        gr.Markdown(
            "**Aucun modèle disponible.** Lancez les scripts `dataset.py`, `features.py` puis `modeling/train.py`."
        )
    else:
        gr.Markdown(f"Seuil de décision actuel : **{THRESHOLD:.2f}**")

    with gr.Tab("Formulaire unitaire"):
        if not INPUT_FEATURES:
            gr.Markdown("Aucune configuration de features détectée. Utilisez l'onglet CSV pour scorer vos données.")
        else:
            form_inputs: list[gr.components.Component] = [] # type: ignore
            for feature in INPUT_FEATURES:
                form_inputs.append(_build_input_component(feature))
            form_output = gr.JSON(label="Résultat")
            gr.Button("Prédire").click(
                fn=predict_from_form,
                inputs=form_inputs,
                outputs=form_output,
            )
        table_input = gr.Dataframe(
            headers=INPUT_FEATURES if INPUT_FEATURES else None,
            row_count=(1, "dynamic"),
            col_count=(len(INPUT_FEATURES), "dynamic") if INPUT_FEATURES else (5, "dynamic"),
            type="pandas",
        )
        table_output = gr.Dataframe(label="Prédictions", type="pandas")
        gr.Button("Scorer les lignes").click(
            fn=score_table,
            inputs=table_input,
            outputs=table_output,
        )

    with gr.Tab("Fichier CSV fusionné"):
        gr.Markdown("Un exemple de fichier à importer est disponible dans le dépôt github : [`references/sample_employees.csv`](https://github.com/stephmnt/OCR_Projet05/blob/main/references/sample_employees.csv)")
        file_input = gr.File(file_types=[".csv"], label="Déposez votre fichier CSV")
        file_output = gr.Dataframe(label="Résultats CSV", type="pandas")
        gr.Button("Scorer le fichier").click(
            fn=score_csv,
            inputs=file_input,
            outputs=file_output,
        )

    with gr.Tab("Fichiers non-mergés"):
        gr.Markdown(
            "Téléversez directement les trois fichiers bruts (SIRH, évaluation, sondage), l'application reproduira automatiquement la fusion puis le scoring."
            "Des tableaux sont disponibles dans le dépôt github pour tester cette fonctionnalité : [](Un exemple de fichier à importer est disponible dans le dépôt github : [`data/raw`](https://github.com/stephmnt/OCR_Projet05/blob/main/data/raw/)."
        )
        sirh_input = gr.File(file_types=[".csv"], label="Fichier SIRH")
        evaluation_input = gr.File(file_types=[".csv"], label="Fichier Évaluation")
        sond_input = gr.File(file_types=[".csv"], label="Fichier Sondage")
        raw_output = gr.Dataframe(label="Résultats fusion automatique", type="pandas")
        gr.Button("Fusionner et scorer").click(
            fn=score_raw_files,
            inputs=[sirh_input, evaluation_input, sond_input],
            outputs=raw_output,
        )


if __name__ == "__main__":
    demo.launch()